156 lines
5.1 KiB
Python
156 lines
5.1 KiB
Python
#!/usr/bin/env python3
|
|
import os
|
|
import sys
|
|
import mariadb
|
|
from google.oauth2 import service_account
|
|
from googleapiclient.discovery import build
|
|
|
|
# ---------------- Configuration ----------------
|
|
# Local directory where clips are stored.
|
|
LOCAL_DIR = "/shared/transcriptor/clips"
|
|
|
|
# Google Drive root folder ID where rclone is syncing your files.
|
|
DRIVE_ROOT_FOLDER_ID = "1qjq9XEC19g6LGw6fwcZXSQYgOO2YuAOA"
|
|
|
|
# Path to your service account credentials JSON file.
|
|
SERVICE_ACCOUNT_FILE = "service_account.json"
|
|
|
|
|
|
# ---------------- Google Drive API Setup ----------------
|
|
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
|
|
try:
|
|
credentials = service_account.Credentials.from_service_account_file(
|
|
SERVICE_ACCOUNT_FILE, scopes=SCOPES
|
|
)
|
|
drive_service = build('drive', 'v3', credentials=credentials)
|
|
except Exception as e:
|
|
print(f"Error setting up Google Drive API: {e}")
|
|
sys.exit(1)
|
|
|
|
# ---------------- Database Connection ----------------
|
|
try:
|
|
conn = mariadb.connect(
|
|
host=os.environ.get("DB_HOST", "192.168.0.187"),
|
|
user=os.environ.get("DB_USER", "t0is"),
|
|
password=os.environ.get("DB_PASS", "Silenceisgolden555"),
|
|
database=os.environ.get("DB_NAME", "transcriptor"),
|
|
port=int(os.environ.get("DB_PORT", 3306))
|
|
)
|
|
cursor = conn.cursor()
|
|
except mariadb.Error as e:
|
|
print(f"Error connecting to MariaDB: {e}")
|
|
sys.exit(1)
|
|
|
|
# ---------------- Helper Functions ----------------
|
|
def search_folder(folder_name, parent_id):
|
|
"""
|
|
Searches for a folder with the given name under the specified parent_id.
|
|
Returns the folder ID if found, otherwise None.
|
|
"""
|
|
query = (
|
|
f"'{parent_id}' in parents and "
|
|
f"name='{folder_name}' and "
|
|
f"mimeType='application/vnd.google-apps.folder' and "
|
|
f"trashed=false"
|
|
)
|
|
try:
|
|
results = drive_service.files().list(
|
|
q=query,
|
|
fields="nextPageToken, files(id, name)",
|
|
includeItemsFromAllDrives=True,
|
|
supportsAllDrives=True
|
|
).execute()
|
|
files = results.get('files', [])
|
|
if files:
|
|
return files[0]['id']
|
|
except Exception as e:
|
|
print(f"Error searching for folder '{folder_name}' under parent '{parent_id}': {e}")
|
|
return None
|
|
|
|
def search_file(file_name, parent_id):
|
|
"""
|
|
Searches for a file (non-folder) with the given name under the specified parent_id.
|
|
Returns the file ID if found, otherwise None.
|
|
"""
|
|
query = (
|
|
f"'{parent_id}' in parents and "
|
|
f"name='{file_name}' and "
|
|
f"mimeType!='application/vnd.google-apps.folder' and "
|
|
f"trashed=false"
|
|
)
|
|
try:
|
|
results = drive_service.files().list(
|
|
q=query,
|
|
fields="nextPageToken, files(id, name)",
|
|
includeItemsFromAllDrives=True,
|
|
supportsAllDrives=True
|
|
).execute()
|
|
files = results.get('files', [])
|
|
if files:
|
|
return files[0]['id']
|
|
except Exception as e:
|
|
print(f"Error searching for file '{file_name}' under parent '{parent_id}': {e}")
|
|
return None
|
|
|
|
def get_drive_file_id(relative_path):
|
|
"""
|
|
Given a relative file path (e.g., "agraelus/from_vod/07-03-25/clip_2399595117_3500.mp4"),
|
|
traverse the folder structure on Google Drive starting at DRIVE_ROOT_FOLDER_ID and
|
|
return the file's ID if found.
|
|
"""
|
|
# Split the path into components. (If rclone always uses forward slashes, you may also split on "/".)
|
|
parts = relative_path.split(os.sep)
|
|
if not parts:
|
|
return None
|
|
|
|
# All parts except the last are folder names.
|
|
*folders, file_name = parts
|
|
parent_id = DRIVE_ROOT_FOLDER_ID
|
|
|
|
# Traverse the folder structure.
|
|
for folder in folders:
|
|
folder_id = search_folder(folder, parent_id)
|
|
if not folder_id:
|
|
print(f"Folder '{folder}' not found under parent '{parent_id}'.")
|
|
return None
|
|
parent_id = folder_id
|
|
|
|
# Now search for the file within the final folder.
|
|
return search_file(file_name, parent_id)
|
|
|
|
def update_database(clip_id, file_id):
|
|
"""
|
|
Updates the clip record in the database with the Google Drive file ID.
|
|
"""
|
|
query = "UPDATE clips SET gdrive_file_id = ? WHERE id = ?"
|
|
try:
|
|
cursor.execute(query, (file_id, clip_id))
|
|
conn.commit()
|
|
print(f"Updated clip id {clip_id} with file_id: {file_id}")
|
|
except mariadb.Error as e:
|
|
print(f"Database update failed for clip id {clip_id}: {e}")
|
|
|
|
# ---------------- Main Process ----------------
|
|
def main():
|
|
try:
|
|
cursor.execute("SELECT id, filename FROM clips where gdrive_file_id is null")
|
|
clips = cursor.fetchall()
|
|
except mariadb.Error as e:
|
|
print(f"Database query failed: {e}")
|
|
return
|
|
|
|
for clip in clips:
|
|
clip_id = clip[0]
|
|
filename = clip[1]
|
|
|
|
print(f"Processing clip id {clip_id}: {filename}")
|
|
drive_file_id = get_drive_file_id(filename.replace('clips/', ''))
|
|
if drive_file_id:
|
|
update_database(clip_id, drive_file_id)
|
|
else:
|
|
print(f"Google Drive file ID not found for clip id {clip_id}.")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
cursor.close()
|
|
conn.close() |