import os import subprocess import requests import mariadb from datetime import datetime, time, timedelta from zoneinfo import ZoneInfo # --------------------------- # Twitch API Helper Functions # --------------------------- def get_access_token(): url = "https://id.twitch.tv/oauth2/token" payload = { "client_id": os.environ.get("TWITCH_CLIENT_ID", ""), "client_secret": os.environ.get("TWITCH_CLIENT_SECRET", ""), "grant_type": "client_credentials" } response = requests.post(url, data=payload) response.raise_for_status() data = response.json() return data["access_token"] # --------------------------- # VOD Processing Functions # --------------------------- def download_vod_audio(vod_url, output_filename): """ Downloads the audio from a VOD using yt-dlp. If the output file already exists, the download is skipped. """ if os.path.exists(output_filename): print(f"{output_filename} already exists. Skipping download.") return command = [ "yt-dlp", "--cookies", "cookies.txt", "-f", "worst", "--extract-audio", "--audio-format", "mp3", "-o", output_filename, vod_url ] subprocess.run(command, check=True) print(f"Downloaded audio from VOD to {output_filename}") # --------------------------- # Database Interaction Functions # --------------------------- def get_pending_videos(db): """ Retrieves videos that are not yet downloaded or processed. Joins the channels table to also fetch the channel_name. """ cursor = db.cursor() query = """ SELECT v.id, v.url, v.external_id, c.channel_name FROM videos v JOIN channels c ON v.channel_id = c.id WHERE v.data_downloaded = 0 AND v.processed = 0 and v.data_downloading = 0 """ cursor.execute(query) columns = [col[0] for col in cursor.description] results = [dict(zip(columns, row)) for row in cursor.fetchall()] cursor.close() return results def db_set_col(db, video_id, column, value=True): """ Updates the specified column (e.g. data_downloaded) for the video. Also updates the updated_at timestamp. """ cursor = db.cursor() if column == "data_downloaded": query = f"UPDATE videos SET {column} = %s, download_end=NOW() WHERE id = %s" else: query = f"UPDATE videos SET {column} = %s WHERE id = %s" cursor.execute(query, (value, video_id)) db.commit() cursor.close() def try_lock_video(db, video_id): """ Attempts to atomically set the data_downloading flag to True only if it is currently False. This update will only affect one row if the video isn’t already being processed. Returns True if the lock was acquired. """ cursor = db.cursor() query = """ UPDATE videos SET data_downloading = 1, updated_at = NOW(), download_start = NOW() WHERE id = %s AND data_downloading = 0 """ cursor.execute(query, (video_id,)) db.commit() affected = cursor.affected_rows cursor.close() return affected == 1 # --------------------------- # Main Functionality # --------------------------- def main(): # Connect to the MariaDB database using credentials from environment variables. try: db = mariadb.connect( host=os.environ.get("DB_HOST", "192.168.0.187"), user=os.environ.get("DB_USER", "t0is"), password=os.environ.get("DB_PASS", "Silenceisgolden555"), database=os.environ.get("DB_NAME", "transcriptor"), port=int(os.environ.get("DB_PORT", 3306)) ) except mariadb.Error as err: print(f"Error connecting to MariaDB: {err}") return pending_videos = get_pending_videos(db) if not pending_videos: print("No pending videos to process.") db.close() return for video in pending_videos: video_id = video['id'] vod_url = video['url'] channel_name = video['channel_name'] # Build output file path: e.g., audio/channel_name/vod_{video_id}.mp3 output_dir = os.path.join("audio", channel_name) os.makedirs(output_dir, exist_ok=True) output_filename = os.path.join(output_dir, f"vod_{video['external_id']}.mp3") print(f"\nProcessing Video ID: {video_id}, Channel: {channel_name}, URL: {vod_url}") if not try_lock_video(db, video_id): print(f"Video ID {video_id} is already being downloaded by another container. Skipping.") continue try: download_vod_audio(vod_url, output_filename) # Update the video as downloaded; you can later update 'processed' when processing is complete. db_set_col(db, video_id, "data_downloaded", True) except Exception as e: print(f"Error processing video ID {video_id}: {e}") finally: db_set_col(db, video_id, "data_downloading", False) db.close() if __name__ == "__main__": main()