gdrive api id loader
This commit is contained in:
parent
ee56aa0008
commit
2134b13c21
@ -9,10 +9,11 @@ RUN apt-get update && \
|
|||||||
|
|
||||||
# Copy requirements file (if you have one) and install Python dependencies
|
# Copy requirements file (if you have one) and install Python dependencies
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip install --no-cache-dir mariadb
|
RUN pip3 install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
# Copy application code, the entrypoint script, and channels.json
|
# Copy application code, the entrypoint script, and channels.json
|
||||||
COPY gdrive_id_loader.py .
|
COPY gdrive_id_loader.py .
|
||||||
COPY rclone.conf /root/.config/rclone/rclone.conf
|
COPY rclone.conf /root/.config/rclone/rclone.conf
|
||||||
|
COPY service_account.json .
|
||||||
# Default command
|
# Default command
|
||||||
CMD ["python", "-u", "gdrive_id_loader.py"]
|
CMD ["python", "-u", "gdrive_id_loader.py"]
|
@ -1,14 +1,33 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
import os
|
import os
|
||||||
import subprocess
|
|
||||||
import re
|
|
||||||
import sys
|
import sys
|
||||||
import mariadb
|
import mariadb
|
||||||
|
from google.oauth2 import service_account
|
||||||
|
from googleapiclient.discovery import build
|
||||||
|
|
||||||
# Configuration – update these values as needed.
|
# ---------------- Configuration ----------------
|
||||||
LOCAL_DIR = "/shared/transcriptor/clips" # Local folder where clips are stored
|
# Local directory where clips are stored.
|
||||||
REMOTE_NAME = "gdrive" # rclone remote name for Google Drive
|
LOCAL_DIR = "/shared/transcriptor/clips"
|
||||||
|
|
||||||
# Connect to the MariaDB database
|
# Google Drive root folder ID where rclone is syncing your files.
|
||||||
|
DRIVE_ROOT_FOLDER_ID = "1qjq9XEC19g6LGw6fwcZXSQYgOO2YuAOA"
|
||||||
|
|
||||||
|
# Path to your service account credentials JSON file.
|
||||||
|
SERVICE_ACCOUNT_FILE = "service_account.json"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------- Google Drive API Setup ----------------
|
||||||
|
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
|
||||||
|
try:
|
||||||
|
credentials = service_account.Credentials.from_service_account_file(
|
||||||
|
SERVICE_ACCOUNT_FILE, scopes=SCOPES
|
||||||
|
)
|
||||||
|
drive_service = build('drive', 'v3', credentials=credentials)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error setting up Google Drive API: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# ---------------- Database Connection ----------------
|
||||||
try:
|
try:
|
||||||
conn = mariadb.connect(
|
conn = mariadb.connect(
|
||||||
host=os.environ.get("DB_HOST", "192.168.0.187"),
|
host=os.environ.get("DB_HOST", "192.168.0.187"),
|
||||||
@ -22,67 +41,114 @@ except mariadb.Error as e:
|
|||||||
print(f"Error connecting to MariaDB: {e}")
|
print(f"Error connecting to MariaDB: {e}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
def get_rclone_link(relative_path):
|
# ---------------- Helper Functions ----------------
|
||||||
|
def search_folder(folder_name, parent_id):
|
||||||
"""
|
"""
|
||||||
Uses rclone to generate a shareable link for the file at the given relative path.
|
Searches for a folder with the given name under the specified parent_id.
|
||||||
|
Returns the folder ID if found, otherwise None.
|
||||||
"""
|
"""
|
||||||
remote_path = f"{REMOTE_NAME}:{relative_path}"
|
query = (
|
||||||
|
f"'{parent_id}' in parents and "
|
||||||
|
f"name='{folder_name}' and "
|
||||||
|
f"mimeType='application/vnd.google-apps.folder' and "
|
||||||
|
f"trashed=false"
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
results = drive_service.files().list(
|
||||||
["rclone", "link", remote_path],
|
q=query,
|
||||||
stdout=subprocess.PIPE,
|
fields="nextPageToken, files(id, name)",
|
||||||
stderr=subprocess.PIPE,
|
includeItemsFromAllDrives=True,
|
||||||
text=True,
|
supportsAllDrives=True
|
||||||
check=True
|
).execute()
|
||||||
)
|
files = results.get('files', [])
|
||||||
return result.stdout.strip()
|
if files:
|
||||||
except subprocess.CalledProcessError as e:
|
return files[0]['id']
|
||||||
print(f"Error obtaining rclone link for {remote_path}: {e.stderr}")
|
except Exception as e:
|
||||||
return None
|
print(f"Error searching for folder '{folder_name}' under parent '{parent_id}': {e}")
|
||||||
|
|
||||||
def extract_file_id(link):
|
|
||||||
"""
|
|
||||||
Extracts the Google Drive file ID from the shareable URL.
|
|
||||||
Expected URL format: https://drive.google.com/file/d/FILE_ID/view?usp=sharing
|
|
||||||
"""
|
|
||||||
match = re.search(r"/d/([^/]+)/", link)
|
|
||||||
if match:
|
|
||||||
return match.group(1)
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def update_database(filename, file_id):
|
def search_file(file_name, parent_id):
|
||||||
"""
|
"""
|
||||||
Updates the clips table in the database with the provided Google Drive file ID.
|
Searches for a file (non-folder) with the given name under the specified parent_id.
|
||||||
This example uses the base name of the file to match the record.
|
Returns the file ID if found, otherwise None.
|
||||||
Adjust the query as needed for your schema.
|
|
||||||
"""
|
"""
|
||||||
base = os.path.basename(filename)
|
query = (
|
||||||
query = "UPDATE clips SET gdrive_file_id = ? WHERE filename LIKE ?"
|
f"'{parent_id}' in parents and "
|
||||||
like_pattern = f"%{base}%"
|
f"name='{file_name}' and "
|
||||||
|
f"mimeType!='application/vnd.google-apps.folder' and "
|
||||||
|
f"trashed=false"
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
cursor.execute(query, (file_id, like_pattern))
|
results = drive_service.files().list(
|
||||||
conn.commit()
|
q=query,
|
||||||
print(f"Updated {base} with file_id: {file_id}")
|
fields="nextPageToken, files(id, name)",
|
||||||
except mariadb.Error as e:
|
includeItemsFromAllDrives=True,
|
||||||
print(f"Database update failed for {base}: {e}")
|
supportsAllDrives=True
|
||||||
|
).execute()
|
||||||
|
files = results.get('files', [])
|
||||||
|
if files:
|
||||||
|
return files[0]['id']
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error searching for file '{file_name}' under parent '{parent_id}': {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_drive_file_id(relative_path):
|
||||||
|
"""
|
||||||
|
Given a relative file path (e.g., "agraelus/from_vod/07-03-25/clip_2399595117_3500.mp4"),
|
||||||
|
traverse the folder structure on Google Drive starting at DRIVE_ROOT_FOLDER_ID and
|
||||||
|
return the file's ID if found.
|
||||||
|
"""
|
||||||
|
# Split the path into components. (If rclone always uses forward slashes, you may also split on "/".)
|
||||||
|
parts = relative_path.split(os.sep)
|
||||||
|
if not parts:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# All parts except the last are folder names.
|
||||||
|
*folders, file_name = parts
|
||||||
|
parent_id = DRIVE_ROOT_FOLDER_ID
|
||||||
|
|
||||||
|
# Traverse the folder structure.
|
||||||
|
for folder in folders:
|
||||||
|
folder_id = search_folder(folder, parent_id)
|
||||||
|
if not folder_id:
|
||||||
|
print(f"Folder '{folder}' not found under parent '{parent_id}'.")
|
||||||
|
return None
|
||||||
|
parent_id = folder_id
|
||||||
|
|
||||||
|
# Now search for the file within the final folder.
|
||||||
|
return search_file(file_name, parent_id)
|
||||||
|
|
||||||
|
def update_database(clip_id, file_id):
|
||||||
|
"""
|
||||||
|
Updates the clip record in the database with the Google Drive file ID.
|
||||||
|
"""
|
||||||
|
query = "UPDATE clips SET gdrive_file_id = ? WHERE id = ?"
|
||||||
|
try:
|
||||||
|
cursor.execute(query, (file_id, clip_id))
|
||||||
|
conn.commit()
|
||||||
|
print(f"Updated clip id {clip_id} with file_id: {file_id}")
|
||||||
|
except mariadb.Error as e:
|
||||||
|
print(f"Database update failed for clip id {clip_id}: {e}")
|
||||||
|
|
||||||
|
# ---------------- Main Process ----------------
|
||||||
def main():
|
def main():
|
||||||
# Walk through the local directory recursively
|
try:
|
||||||
for root, dirs, files in os.walk(LOCAL_DIR):
|
cursor.execute("SELECT id, filename FROM clips where gdrive_file_id is null")
|
||||||
for file in files:
|
clips = cursor.fetchall()
|
||||||
full_path = os.path.join(root, file)
|
except mariadb.Error as e:
|
||||||
# Compute the relative path to preserve folder structure in the remote
|
print(f"Database query failed: {e}")
|
||||||
rel_path = os.path.relpath(full_path, LOCAL_DIR)
|
return
|
||||||
print(f"Processing file: {full_path} (relative: {rel_path})")
|
|
||||||
link = get_rclone_link(rel_path)
|
for clip in clips:
|
||||||
if link:
|
clip_id = clip[0]
|
||||||
file_id = extract_file_id(link)
|
filename = clip[1]
|
||||||
if file_id:
|
|
||||||
update_database(rel_path, file_id)
|
print(f"Processing clip id {clip_id}: {filename}")
|
||||||
else:
|
drive_file_id = get_drive_file_id(filename.replace('clips/', ''))
|
||||||
print(f"Could not extract file ID from link: {link}")
|
if drive_file_id:
|
||||||
else:
|
update_database(clip_id, drive_file_id)
|
||||||
print(f"No link generated for file: {full_path}")
|
else:
|
||||||
|
print(f"Google Drive file ID not found for clip id {clip_id}.")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
@ -1 +1,5 @@
|
|||||||
mariadb
|
mariadb
|
||||||
|
google-api-python-client
|
||||||
|
google-auth
|
||||||
|
google-auth-httplib2
|
||||||
|
google-auth-oauthlib
|
Loading…
Reference in New Issue
Block a user