#!/usr/bin/env python3 import os import sys import mariadb from google.oauth2 import service_account from googleapiclient.discovery import build # ---------------- Configuration ---------------- # Local directory where clips are stored. LOCAL_DIR = "/shared/transcriptor/clips" # Google Drive root folder ID where rclone is syncing your files. DRIVE_ROOT_FOLDER_ID = "1qjq9XEC19g6LGw6fwcZXSQYgOO2YuAOA" # Path to your service account credentials JSON file. SERVICE_ACCOUNT_FILE = "service_account.json" # ---------------- Google Drive API Setup ---------------- SCOPES = ['https://www.googleapis.com/auth/drive.readonly'] try: credentials = service_account.Credentials.from_service_account_file( SERVICE_ACCOUNT_FILE, scopes=SCOPES ) drive_service = build('drive', 'v3', credentials=credentials) except Exception as e: print(f"Error setting up Google Drive API: {e}") sys.exit(1) # ---------------- Database Connection ---------------- try: conn = mariadb.connect( host=os.environ.get("DB_HOST", "192.168.0.187"), user=os.environ.get("DB_USER", "t0is"), password=os.environ.get("DB_PASS", "Silenceisgolden555"), database=os.environ.get("DB_NAME", "transcriptor"), port=int(os.environ.get("DB_PORT", 3306)) ) cursor = conn.cursor() except mariadb.Error as e: print(f"Error connecting to MariaDB: {e}") sys.exit(1) # ---------------- Helper Functions ---------------- def search_folder(folder_name, parent_id): """ Searches for a folder with the given name under the specified parent_id. Returns the folder ID if found, otherwise None. """ query = ( f"'{parent_id}' in parents and " f"name='{folder_name}' and " f"mimeType='application/vnd.google-apps.folder' and " f"trashed=false" ) try: results = drive_service.files().list( q=query, fields="nextPageToken, files(id, name)", includeItemsFromAllDrives=True, supportsAllDrives=True ).execute() files = results.get('files', []) if files: return files[0]['id'] except Exception as e: print(f"Error searching for folder '{folder_name}' under parent '{parent_id}': {e}") return None def search_file(file_name, parent_id): """ Searches for a file (non-folder) with the given name under the specified parent_id. Returns the file ID if found, otherwise None. """ query = ( f"'{parent_id}' in parents and " f"name='{file_name}' and " f"mimeType!='application/vnd.google-apps.folder' and " f"trashed=false" ) try: results = drive_service.files().list( q=query, fields="nextPageToken, files(id, name)", includeItemsFromAllDrives=True, supportsAllDrives=True ).execute() files = results.get('files', []) if files: return files[0]['id'] except Exception as e: print(f"Error searching for file '{file_name}' under parent '{parent_id}': {e}") return None def get_drive_file_id(relative_path): """ Given a relative file path (e.g., "agraelus/from_vod/07-03-25/clip_2399595117_3500.mp4"), traverse the folder structure on Google Drive starting at DRIVE_ROOT_FOLDER_ID and return the file's ID if found. """ # Split the path into components. (If rclone always uses forward slashes, you may also split on "/".) parts = relative_path.split(os.sep) if not parts: return None # All parts except the last are folder names. *folders, file_name = parts parent_id = DRIVE_ROOT_FOLDER_ID # Traverse the folder structure. for folder in folders: folder_id = search_folder(folder, parent_id) if not folder_id: print(f"Folder '{folder}' not found under parent '{parent_id}'.") return None parent_id = folder_id # Now search for the file within the final folder. return search_file(file_name, parent_id) def update_database(clip_id, file_id): """ Updates the clip record in the database with the Google Drive file ID. """ query = "UPDATE clips SET gdrive_file_id = ? WHERE id = ?" try: cursor.execute(query, (file_id, clip_id)) conn.commit() print(f"Updated clip id {clip_id} with file_id: {file_id}") except mariadb.Error as e: print(f"Database update failed for clip id {clip_id}: {e}") # ---------------- Main Process ---------------- def main(): try: cursor.execute("SELECT id, filename FROM clips where gdrive_file_id is null") clips = cursor.fetchall() except mariadb.Error as e: print(f"Database query failed: {e}") return for clip in clips: clip_id = clip[0] filename = clip[1] print(f"Processing clip id {clip_id}: {filename}") drive_file_id = get_drive_file_id(filename.replace('clips/', '')) if drive_file_id: update_database(clip_id, drive_file_id) else: print(f"Google Drive file ID not found for clip id {clip_id}.") if __name__ == "__main__": main() cursor.close() conn.close()