From ee56aa0008feb3038b6faebfe75d20ab23a85016 Mon Sep 17 00:00:00 2001 From: t0is Date: Wed, 2 Apr 2025 04:41:29 +0200 Subject: [PATCH] add id loader --- docker-compose.yml | 18 +++++- docker/id_loader/Dockerfile | 18 ++++++ Dockerfile => docker/syncer/Dockerfile | 0 gdrive_id_loader.py | 90 ++++++++++++++++++++++++++ rclone.conf | 2 +- requirements.txt | 1 + 6 files changed, 127 insertions(+), 2 deletions(-) create mode 100644 docker/id_loader/Dockerfile rename Dockerfile => docker/syncer/Dockerfile (100%) create mode 100644 gdrive_id_loader.py create mode 100644 requirements.txt diff --git a/docker-compose.yml b/docker-compose.yml index 9005b9e..f56f5da 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,3 +1,9 @@ +networks: + mariadb: + external: true + name: mariadb + + services: drive-sync: image: t0is/transcriptor-gdrive-sync:latest @@ -5,4 +11,14 @@ services: # Set your target Google Drive folder ID here. - DRIVE_FOLDER_ID=1qjq9XEC19g6LGw6fwcZXSQYgOO2YuAOA volumes: - - /shared/transcriptor/clips:/shared/transcriptor/clips \ No newline at end of file + - /shared/transcriptor/clips:/shared/transcriptor/clips + + drive-id-loader: + image: t0is/transcriptor-gdrive-sync:id_loader + environment: + # Set your target Google Drive folder ID here. + - DRIVE_FOLDER_ID=1qjq9XEC19g6LGw6fwcZXSQYgOO2YuAOA + volumes: + - /shared/transcriptor/clips:/shared/transcriptor/clips + networks: + - mariadb diff --git a/docker/id_loader/Dockerfile b/docker/id_loader/Dockerfile new file mode 100644 index 0000000..89d57b5 --- /dev/null +++ b/docker/id_loader/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.9-slim + +WORKDIR /app + +# Install required system packages including MariaDB development headers and gcc +RUN apt-get update && \ + apt-get install -y curl rclone bash unzip libmariadb-dev gcc && \ + rm -rf /var/lib/apt/lists/* + +# Copy requirements file (if you have one) and install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir mariadb + +# Copy application code, the entrypoint script, and channels.json +COPY gdrive_id_loader.py . +COPY rclone.conf /root/.config/rclone/rclone.conf +# Default command +CMD ["python", "-u", "gdrive_id_loader.py"] \ No newline at end of file diff --git a/Dockerfile b/docker/syncer/Dockerfile similarity index 100% rename from Dockerfile rename to docker/syncer/Dockerfile diff --git a/gdrive_id_loader.py b/gdrive_id_loader.py new file mode 100644 index 0000000..1d7bac5 --- /dev/null +++ b/gdrive_id_loader.py @@ -0,0 +1,90 @@ +import os +import subprocess +import re +import sys +import mariadb + +# Configuration – update these values as needed. +LOCAL_DIR = "/shared/transcriptor/clips" # Local folder where clips are stored +REMOTE_NAME = "gdrive" # rclone remote name for Google Drive + +# Connect to the MariaDB database +try: + conn = mariadb.connect( + host=os.environ.get("DB_HOST", "192.168.0.187"), + user=os.environ.get("DB_USER", "t0is"), + password=os.environ.get("DB_PASS", "Silenceisgolden555"), + database=os.environ.get("DB_NAME", "transcriptor"), + port=int(os.environ.get("DB_PORT", 3306)) + ) + cursor = conn.cursor() +except mariadb.Error as e: + print(f"Error connecting to MariaDB: {e}") + sys.exit(1) + +def get_rclone_link(relative_path): + """ + Uses rclone to generate a shareable link for the file at the given relative path. + """ + remote_path = f"{REMOTE_NAME}:{relative_path}" + try: + result = subprocess.run( + ["rclone", "link", remote_path], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=True + ) + return result.stdout.strip() + except subprocess.CalledProcessError as e: + print(f"Error obtaining rclone link for {remote_path}: {e.stderr}") + return None + +def extract_file_id(link): + """ + Extracts the Google Drive file ID from the shareable URL. + Expected URL format: https://drive.google.com/file/d/FILE_ID/view?usp=sharing + """ + match = re.search(r"/d/([^/]+)/", link) + if match: + return match.group(1) + return None + +def update_database(filename, file_id): + """ + Updates the clips table in the database with the provided Google Drive file ID. + This example uses the base name of the file to match the record. + Adjust the query as needed for your schema. + """ + base = os.path.basename(filename) + query = "UPDATE clips SET gdrive_file_id = ? WHERE filename LIKE ?" + like_pattern = f"%{base}%" + try: + cursor.execute(query, (file_id, like_pattern)) + conn.commit() + print(f"Updated {base} with file_id: {file_id}") + except mariadb.Error as e: + print(f"Database update failed for {base}: {e}") + +def main(): + # Walk through the local directory recursively + for root, dirs, files in os.walk(LOCAL_DIR): + for file in files: + full_path = os.path.join(root, file) + # Compute the relative path to preserve folder structure in the remote + rel_path = os.path.relpath(full_path, LOCAL_DIR) + print(f"Processing file: {full_path} (relative: {rel_path})") + link = get_rclone_link(rel_path) + if link: + file_id = extract_file_id(link) + if file_id: + update_database(rel_path, file_id) + else: + print(f"Could not extract file ID from link: {link}") + else: + print(f"No link generated for file: {full_path}") + +if __name__ == "__main__": + main() + cursor.close() + conn.close() \ No newline at end of file diff --git a/rclone.conf b/rclone.conf index 0b3013f..016120e 100644 --- a/rclone.conf +++ b/rclone.conf @@ -3,6 +3,6 @@ type = drive client_id = 830644885820-9792hiic15cmglcbjg5nl210im3m7m6r.apps.googleusercontent.com client_secret = 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 scope = drive -token = {"access_token":"ya29.a0AeXRPp6RfjrFgD1T1RpH5lMnj9sUc55ePQo0THDsX5OCagZ-u7NXcu0scNuBBvzim9lUvhi8L0x4UQ2g9HuvfdTnP8ydd3T00PxK89pXFUdRMKKajDHIAJw2Upg8d-dtZQnLPjERyqNtx6sls5DJxM7i2l4Ezg2utctrAlf5aCgYKAaASARMSFQHGX2MiE4vJaQGY_rrfT5oXAtwVoA0175","token_type":"Bearer","refresh_token":"1//09gDVl-tGofbeCgYIARAAGAkSNwF-L9IrXic-IxKKryOSicpnI8nX-lzEfe43dcgiCQ4f4KPnQqzSi3VfDHQ5vSqziAJGlQgjw3k","expiry":"2025-03-21T16:39:18.807206+01:00"} +token = {"access_token":"ya29.a0AeXRPp5DaLsik9K5sYbr-xRi89lGpkpum_lk74JvBA5ua9DusiFfIVxxBqEjvMlRBmBrRQHx31a4TzQ51GsQQBKnObJPPDNKzTb7KXhOLzAGjT0zBITzQ9K2-ystJNfKv1nMGfDiVje11iOvp3UsynFcBjJj2CS4Qp8rJgqLaCgYKAcASARMSFQHGX2MiykOVhvsUmJ8n1S1TqaOskg0175","token_type":"Bearer","refresh_token":"1//098qddus5NnV-CgYIARAAGAkSNwF-L9Ir4Pm0HUGMFopHsJ4yxSfM1-jjIXaU0AhOyYFriHmf7qVsVPQ6Mxm1ZB5NhcAPiSUp9t4","expiry":"2025-04-01T18:10:24.926353+02:00"} team_drive = diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..45f92cd --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +mariadb \ No newline at end of file