transcriptor-gdrive-sync/gdrive_id_loader.py
2025-04-02 05:39:44 +02:00

156 lines
5.1 KiB
Python

#!/usr/bin/env python3
import os
import sys
import mariadb
from google.oauth2 import service_account
from googleapiclient.discovery import build
# ---------------- Configuration ----------------
# Local directory where clips are stored.
LOCAL_DIR = "/shared/transcriptor/clips"
# Google Drive root folder ID where rclone is syncing your files.
DRIVE_ROOT_FOLDER_ID = "1qjq9XEC19g6LGw6fwcZXSQYgOO2YuAOA"
# Path to your service account credentials JSON file.
SERVICE_ACCOUNT_FILE = "service_account.json"
# ---------------- Google Drive API Setup ----------------
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
try:
credentials = service_account.Credentials.from_service_account_file(
SERVICE_ACCOUNT_FILE, scopes=SCOPES
)
drive_service = build('drive', 'v3', credentials=credentials)
except Exception as e:
print(f"Error setting up Google Drive API: {e}")
sys.exit(1)
# ---------------- Database Connection ----------------
try:
conn = mariadb.connect(
host=os.environ.get("DB_HOST", "192.168.0.187"),
user=os.environ.get("DB_USER", "t0is"),
password=os.environ.get("DB_PASS", "Silenceisgolden555"),
database=os.environ.get("DB_NAME", "transcriptor"),
port=int(os.environ.get("DB_PORT", 3306))
)
cursor = conn.cursor()
except mariadb.Error as e:
print(f"Error connecting to MariaDB: {e}")
sys.exit(1)
# ---------------- Helper Functions ----------------
def search_folder(folder_name, parent_id):
"""
Searches for a folder with the given name under the specified parent_id.
Returns the folder ID if found, otherwise None.
"""
query = (
f"'{parent_id}' in parents and "
f"name='{folder_name}' and "
f"mimeType='application/vnd.google-apps.folder' and "
f"trashed=false"
)
try:
results = drive_service.files().list(
q=query,
fields="nextPageToken, files(id, name)",
includeItemsFromAllDrives=True,
supportsAllDrives=True
).execute()
files = results.get('files', [])
if files:
return files[0]['id']
except Exception as e:
print(f"Error searching for folder '{folder_name}' under parent '{parent_id}': {e}")
return None
def search_file(file_name, parent_id):
"""
Searches for a file (non-folder) with the given name under the specified parent_id.
Returns the file ID if found, otherwise None.
"""
query = (
f"'{parent_id}' in parents and "
f"name='{file_name}' and "
f"mimeType!='application/vnd.google-apps.folder' and "
f"trashed=false"
)
try:
results = drive_service.files().list(
q=query,
fields="nextPageToken, files(id, name)",
includeItemsFromAllDrives=True,
supportsAllDrives=True
).execute()
files = results.get('files', [])
if files:
return files[0]['id']
except Exception as e:
print(f"Error searching for file '{file_name}' under parent '{parent_id}': {e}")
return None
def get_drive_file_id(relative_path):
"""
Given a relative file path (e.g., "agraelus/from_vod/07-03-25/clip_2399595117_3500.mp4"),
traverse the folder structure on Google Drive starting at DRIVE_ROOT_FOLDER_ID and
return the file's ID if found.
"""
# Split the path into components. (If rclone always uses forward slashes, you may also split on "/".)
parts = relative_path.split(os.sep)
if not parts:
return None
# All parts except the last are folder names.
*folders, file_name = parts
parent_id = DRIVE_ROOT_FOLDER_ID
# Traverse the folder structure.
for folder in folders:
folder_id = search_folder(folder, parent_id)
if not folder_id:
print(f"Folder '{folder}' not found under parent '{parent_id}'.")
return None
parent_id = folder_id
# Now search for the file within the final folder.
return search_file(file_name, parent_id)
def update_database(clip_id, file_id):
"""
Updates the clip record in the database with the Google Drive file ID.
"""
query = "UPDATE clips SET gdrive_file_id = ? WHERE id = ?"
try:
cursor.execute(query, (file_id, clip_id))
conn.commit()
print(f"Updated clip id {clip_id} with file_id: {file_id}")
except mariadb.Error as e:
print(f"Database update failed for clip id {clip_id}: {e}")
# ---------------- Main Process ----------------
def main():
try:
cursor.execute("SELECT id, filename FROM clips where gdrive_file_id is null")
clips = cursor.fetchall()
except mariadb.Error as e:
print(f"Database query failed: {e}")
return
for clip in clips:
clip_id = clip[0]
filename = clip[1]
print(f"Processing clip id {clip_id}: {filename}")
drive_file_id = get_drive_file_id(filename.replace('clips/', ''))
if drive_file_id:
update_database(clip_id, drive_file_id)
else:
print(f"Google Drive file ID not found for clip id {clip_id}.")
if __name__ == "__main__":
main()
cursor.close()
conn.close()