152 lines
4.8 KiB
Python
152 lines
4.8 KiB
Python
import os
|
||
import subprocess
|
||
import requests
|
||
import mariadb
|
||
from datetime import datetime, time, timedelta
|
||
from zoneinfo import ZoneInfo
|
||
|
||
|
||
# ---------------------------
|
||
# Twitch API Helper Functions
|
||
# ---------------------------
|
||
def get_access_token():
|
||
url = "https://id.twitch.tv/oauth2/token"
|
||
payload = {
|
||
"client_id": os.environ.get("TWITCH_CLIENT_ID", ""),
|
||
"client_secret": os.environ.get("TWITCH_CLIENT_SECRET", ""),
|
||
"grant_type": "client_credentials"
|
||
}
|
||
response = requests.post(url, data=payload)
|
||
response.raise_for_status()
|
||
data = response.json()
|
||
return data["access_token"]
|
||
|
||
|
||
# ---------------------------
|
||
# VOD Processing Functions
|
||
# ---------------------------
|
||
def download_vod_audio(vod_url, output_filename):
|
||
"""
|
||
Downloads the audio from a VOD using yt-dlp.
|
||
If the output file already exists, the download is skipped.
|
||
"""
|
||
if os.path.exists(output_filename):
|
||
print(f"{output_filename} already exists. Skipping download.")
|
||
return
|
||
command = [
|
||
"yt-dlp",
|
||
"--cookies", "cookies.txt",
|
||
"-f", "worst",
|
||
"--extract-audio",
|
||
"--audio-format", "mp3",
|
||
"-o", output_filename,
|
||
vod_url
|
||
]
|
||
subprocess.run(command, check=True)
|
||
print(f"Downloaded audio from VOD to {output_filename}")
|
||
|
||
|
||
# ---------------------------
|
||
# Database Interaction Functions
|
||
# ---------------------------
|
||
def get_pending_videos(db):
|
||
"""
|
||
Retrieves videos that are not yet downloaded or processed.
|
||
Joins the channels table to also fetch the channel_name.
|
||
"""
|
||
cursor = db.cursor()
|
||
query = """
|
||
SELECT v.id, v.url, c.channel_name
|
||
FROM videos v
|
||
JOIN channels c ON v.channel_id = c.id
|
||
WHERE v.data_downloaded = 0 AND v.processed = 0 and v.data_downloading = 0
|
||
"""
|
||
cursor.execute(query)
|
||
columns = [col[0] for col in cursor.description]
|
||
results = [dict(zip(columns, row)) for row in cursor.fetchall()]
|
||
cursor.close()
|
||
return results
|
||
|
||
|
||
def db_set_col(db, video_id, column, value=True):
|
||
"""
|
||
Updates the specified column (e.g. data_downloaded) for the video.
|
||
Also updates the updated_at timestamp.
|
||
"""
|
||
cursor = db.cursor()
|
||
query = f"UPDATE videos SET {column} = %s WHERE id = %s"
|
||
cursor.execute(query, (value, video_id))
|
||
db.commit()
|
||
cursor.close()
|
||
|
||
def try_lock_video(db, video_id):
|
||
"""
|
||
Attempts to atomically set the data_downloading flag to True only if it is currently False.
|
||
This update will only affect one row if the video isn’t already being processed.
|
||
Returns True if the lock was acquired.
|
||
"""
|
||
cursor = db.cursor()
|
||
query = """
|
||
UPDATE videos
|
||
SET data_downloading = 1, updated_at = NOW()
|
||
WHERE id = %s AND data_downloading = 0
|
||
"""
|
||
cursor.execute(query, (video_id,))
|
||
db.commit()
|
||
affected = cursor.rowcount
|
||
cursor.close()
|
||
return affected == 1
|
||
|
||
# ---------------------------
|
||
# Main Functionality
|
||
# ---------------------------
|
||
def main():
|
||
# Connect to the MariaDB database using credentials from environment variables.
|
||
try:
|
||
db = mariadb.connect(
|
||
host=os.environ.get("DB_HOST", "mariadb"),
|
||
user=os.environ.get("DB_USER", "t0is"),
|
||
password=os.environ.get("DB_PASS", "Silenceisgolden555"),
|
||
database=os.environ.get("DB_NAME", "transcriptor"),
|
||
port=int(os.environ.get("DB_PORT", 3306))
|
||
)
|
||
except mariadb.Error as err:
|
||
print(f"Error connecting to MariaDB: {err}")
|
||
return
|
||
|
||
pending_videos = get_pending_videos(db)
|
||
if not pending_videos:
|
||
print("No pending videos to process.")
|
||
db.close()
|
||
return
|
||
|
||
for video in pending_videos:
|
||
video_id = video['id']
|
||
vod_url = video['url']
|
||
channel_name = video['channel_name']
|
||
|
||
# Build output file path: e.g., audio/channel_name/vod_{video_id}.mp3
|
||
output_dir = os.path.join("audio", channel_name)
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
output_filename = os.path.join(output_dir, f"vod_{video['external_id']}.mp3")
|
||
|
||
print(f"\nProcessing Video ID: {video_id}, Channel: {channel_name}, URL: {vod_url}")
|
||
|
||
if not try_lock_video(db, video_id):
|
||
print(f"Video ID {video_id} is already being downloaded by another container. Skipping.")
|
||
continue
|
||
|
||
try:
|
||
download_vod_audio(vod_url, output_filename)
|
||
# Update the video as downloaded; you can later update 'processed' when processing is complete.
|
||
db_set_col(db, video_id, "data_downloaded", True)
|
||
except Exception as e:
|
||
print(f"Error processing video ID {video_id}: {e}")
|
||
finally:
|
||
db_set_col(db, video_id, "data_downloading", False)
|
||
|
||
db.close()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main() |