gdrive api id loader
This commit is contained in:
parent
ee56aa0008
commit
2134b13c21
@ -9,10 +9,11 @@ RUN apt-get update && \
|
||||
|
||||
# Copy requirements file (if you have one) and install Python dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir mariadb
|
||||
RUN pip3 install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application code, the entrypoint script, and channels.json
|
||||
COPY gdrive_id_loader.py .
|
||||
COPY rclone.conf /root/.config/rclone/rclone.conf
|
||||
COPY service_account.json .
|
||||
# Default command
|
||||
CMD ["python", "-u", "gdrive_id_loader.py"]
|
@ -1,14 +1,33 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import subprocess
|
||||
import re
|
||||
import sys
|
||||
import mariadb
|
||||
from google.oauth2 import service_account
|
||||
from googleapiclient.discovery import build
|
||||
|
||||
# Configuration – update these values as needed.
|
||||
LOCAL_DIR = "/shared/transcriptor/clips" # Local folder where clips are stored
|
||||
REMOTE_NAME = "gdrive" # rclone remote name for Google Drive
|
||||
# ---------------- Configuration ----------------
|
||||
# Local directory where clips are stored.
|
||||
LOCAL_DIR = "/shared/transcriptor/clips"
|
||||
|
||||
# Connect to the MariaDB database
|
||||
# Google Drive root folder ID where rclone is syncing your files.
|
||||
DRIVE_ROOT_FOLDER_ID = "1qjq9XEC19g6LGw6fwcZXSQYgOO2YuAOA"
|
||||
|
||||
# Path to your service account credentials JSON file.
|
||||
SERVICE_ACCOUNT_FILE = "service_account.json"
|
||||
|
||||
|
||||
# ---------------- Google Drive API Setup ----------------
|
||||
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
|
||||
try:
|
||||
credentials = service_account.Credentials.from_service_account_file(
|
||||
SERVICE_ACCOUNT_FILE, scopes=SCOPES
|
||||
)
|
||||
drive_service = build('drive', 'v3', credentials=credentials)
|
||||
except Exception as e:
|
||||
print(f"Error setting up Google Drive API: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# ---------------- Database Connection ----------------
|
||||
try:
|
||||
conn = mariadb.connect(
|
||||
host=os.environ.get("DB_HOST", "192.168.0.187"),
|
||||
@ -22,67 +41,114 @@ except mariadb.Error as e:
|
||||
print(f"Error connecting to MariaDB: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def get_rclone_link(relative_path):
|
||||
# ---------------- Helper Functions ----------------
|
||||
def search_folder(folder_name, parent_id):
|
||||
"""
|
||||
Uses rclone to generate a shareable link for the file at the given relative path.
|
||||
Searches for a folder with the given name under the specified parent_id.
|
||||
Returns the folder ID if found, otherwise None.
|
||||
"""
|
||||
remote_path = f"{REMOTE_NAME}:{relative_path}"
|
||||
query = (
|
||||
f"'{parent_id}' in parents and "
|
||||
f"name='{folder_name}' and "
|
||||
f"mimeType='application/vnd.google-apps.folder' and "
|
||||
f"trashed=false"
|
||||
)
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["rclone", "link", remote_path],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
return result.stdout.strip()
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error obtaining rclone link for {remote_path}: {e.stderr}")
|
||||
return None
|
||||
|
||||
def extract_file_id(link):
|
||||
"""
|
||||
Extracts the Google Drive file ID from the shareable URL.
|
||||
Expected URL format: https://drive.google.com/file/d/FILE_ID/view?usp=sharing
|
||||
"""
|
||||
match = re.search(r"/d/([^/]+)/", link)
|
||||
if match:
|
||||
return match.group(1)
|
||||
results = drive_service.files().list(
|
||||
q=query,
|
||||
fields="nextPageToken, files(id, name)",
|
||||
includeItemsFromAllDrives=True,
|
||||
supportsAllDrives=True
|
||||
).execute()
|
||||
files = results.get('files', [])
|
||||
if files:
|
||||
return files[0]['id']
|
||||
except Exception as e:
|
||||
print(f"Error searching for folder '{folder_name}' under parent '{parent_id}': {e}")
|
||||
return None
|
||||
|
||||
def update_database(filename, file_id):
|
||||
def search_file(file_name, parent_id):
|
||||
"""
|
||||
Updates the clips table in the database with the provided Google Drive file ID.
|
||||
This example uses the base name of the file to match the record.
|
||||
Adjust the query as needed for your schema.
|
||||
Searches for a file (non-folder) with the given name under the specified parent_id.
|
||||
Returns the file ID if found, otherwise None.
|
||||
"""
|
||||
base = os.path.basename(filename)
|
||||
query = "UPDATE clips SET gdrive_file_id = ? WHERE filename LIKE ?"
|
||||
like_pattern = f"%{base}%"
|
||||
query = (
|
||||
f"'{parent_id}' in parents and "
|
||||
f"name='{file_name}' and "
|
||||
f"mimeType!='application/vnd.google-apps.folder' and "
|
||||
f"trashed=false"
|
||||
)
|
||||
try:
|
||||
cursor.execute(query, (file_id, like_pattern))
|
||||
conn.commit()
|
||||
print(f"Updated {base} with file_id: {file_id}")
|
||||
except mariadb.Error as e:
|
||||
print(f"Database update failed for {base}: {e}")
|
||||
results = drive_service.files().list(
|
||||
q=query,
|
||||
fields="nextPageToken, files(id, name)",
|
||||
includeItemsFromAllDrives=True,
|
||||
supportsAllDrives=True
|
||||
).execute()
|
||||
files = results.get('files', [])
|
||||
if files:
|
||||
return files[0]['id']
|
||||
except Exception as e:
|
||||
print(f"Error searching for file '{file_name}' under parent '{parent_id}': {e}")
|
||||
return None
|
||||
|
||||
def get_drive_file_id(relative_path):
|
||||
"""
|
||||
Given a relative file path (e.g., "agraelus/from_vod/07-03-25/clip_2399595117_3500.mp4"),
|
||||
traverse the folder structure on Google Drive starting at DRIVE_ROOT_FOLDER_ID and
|
||||
return the file's ID if found.
|
||||
"""
|
||||
# Split the path into components. (If rclone always uses forward slashes, you may also split on "/".)
|
||||
parts = relative_path.split(os.sep)
|
||||
if not parts:
|
||||
return None
|
||||
|
||||
# All parts except the last are folder names.
|
||||
*folders, file_name = parts
|
||||
parent_id = DRIVE_ROOT_FOLDER_ID
|
||||
|
||||
# Traverse the folder structure.
|
||||
for folder in folders:
|
||||
folder_id = search_folder(folder, parent_id)
|
||||
if not folder_id:
|
||||
print(f"Folder '{folder}' not found under parent '{parent_id}'.")
|
||||
return None
|
||||
parent_id = folder_id
|
||||
|
||||
# Now search for the file within the final folder.
|
||||
return search_file(file_name, parent_id)
|
||||
|
||||
def update_database(clip_id, file_id):
|
||||
"""
|
||||
Updates the clip record in the database with the Google Drive file ID.
|
||||
"""
|
||||
query = "UPDATE clips SET gdrive_file_id = ? WHERE id = ?"
|
||||
try:
|
||||
cursor.execute(query, (file_id, clip_id))
|
||||
conn.commit()
|
||||
print(f"Updated clip id {clip_id} with file_id: {file_id}")
|
||||
except mariadb.Error as e:
|
||||
print(f"Database update failed for clip id {clip_id}: {e}")
|
||||
|
||||
# ---------------- Main Process ----------------
|
||||
def main():
|
||||
# Walk through the local directory recursively
|
||||
for root, dirs, files in os.walk(LOCAL_DIR):
|
||||
for file in files:
|
||||
full_path = os.path.join(root, file)
|
||||
# Compute the relative path to preserve folder structure in the remote
|
||||
rel_path = os.path.relpath(full_path, LOCAL_DIR)
|
||||
print(f"Processing file: {full_path} (relative: {rel_path})")
|
||||
link = get_rclone_link(rel_path)
|
||||
if link:
|
||||
file_id = extract_file_id(link)
|
||||
if file_id:
|
||||
update_database(rel_path, file_id)
|
||||
else:
|
||||
print(f"Could not extract file ID from link: {link}")
|
||||
else:
|
||||
print(f"No link generated for file: {full_path}")
|
||||
try:
|
||||
cursor.execute("SELECT id, filename FROM clips where gdrive_file_id is null")
|
||||
clips = cursor.fetchall()
|
||||
except mariadb.Error as e:
|
||||
print(f"Database query failed: {e}")
|
||||
return
|
||||
|
||||
for clip in clips:
|
||||
clip_id = clip[0]
|
||||
filename = clip[1]
|
||||
|
||||
print(f"Processing clip id {clip_id}: {filename}")
|
||||
drive_file_id = get_drive_file_id(filename.replace('clips/', ''))
|
||||
if drive_file_id:
|
||||
update_database(clip_id, drive_file_id)
|
||||
else:
|
||||
print(f"Google Drive file ID not found for clip id {clip_id}.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
@ -1 +1,5 @@
|
||||
mariadb
|
||||
mariadb
|
||||
google-api-python-client
|
||||
google-auth
|
||||
google-auth-httplib2
|
||||
google-auth-oauthlib
|
Loading…
Reference in New Issue
Block a user