From 74ca4ac74623c3077a74b446fb8eb3d50232e0ea Mon Sep 17 00:00:00 2001 From: t0is Date: Mon, 10 Mar 2025 17:12:33 +0100 Subject: [PATCH] edits --- docker-compose.yml | 70 ++++++++++++++++----------------- generate-docker-compose.py | 2 +- main.py | 80 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 112 insertions(+), 40 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 38a4c56..fdf665c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,7 @@ services: environment: - CHANNEL_NAME=agraelus - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -20,7 +20,7 @@ services: environment: - CHANNEL_NAME=amfikcz - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -37,7 +37,7 @@ services: environment: - CHANNEL_NAME=andrej_kalinin - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -54,7 +54,7 @@ services: environment: - CHANNEL_NAME=arcadebulls - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -71,7 +71,7 @@ services: environment: - CHANNEL_NAME=artix - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -88,7 +88,7 @@ services: environment: - CHANNEL_NAME=astatoro - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -105,7 +105,7 @@ services: environment: - CHANNEL_NAME=avatar0fwar - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -122,7 +122,7 @@ services: environment: - CHANNEL_NAME=batmanova - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -139,7 +139,7 @@ services: environment: - CHANNEL_NAME=bladeito - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -156,7 +156,7 @@ services: environment: - CHANNEL_NAME=claina - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -173,7 +173,7 @@ services: environment: - CHANNEL_NAME=czechcloud - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -190,7 +190,7 @@ services: environment: - CHANNEL_NAME=domovnikofc - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -207,7 +207,7 @@ services: environment: - CHANNEL_NAME=elbowcz146 - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -224,7 +224,7 @@ services: environment: - CHANNEL_NAME=fluffcz - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -241,7 +241,7 @@ services: environment: - CHANNEL_NAME=flyguncz - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -258,7 +258,7 @@ services: environment: - CHANNEL_NAME=freezecz - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -275,7 +275,7 @@ services: environment: - CHANNEL_NAME=heddi2k - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -292,7 +292,7 @@ services: environment: - CHANNEL_NAME=herdyn - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -309,7 +309,7 @@ services: environment: - CHANNEL_NAME=himtheoldboy - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -326,7 +326,7 @@ services: environment: - CHANNEL_NAME=holasovic - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -343,7 +343,7 @@ services: environment: - CHANNEL_NAME=hornakcz - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -360,7 +360,7 @@ services: environment: - CHANNEL_NAME=jorantheviking - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -377,7 +377,7 @@ services: environment: - CHANNEL_NAME=kokiii_ - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -394,7 +394,7 @@ services: environment: - CHANNEL_NAME=liveoliverr - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -411,7 +411,7 @@ services: environment: - CHANNEL_NAME=love_stanislove - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -428,7 +428,7 @@ services: environment: - CHANNEL_NAME=marty_vole - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -445,7 +445,7 @@ services: environment: - CHANNEL_NAME=mullersie - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -462,7 +462,7 @@ services: environment: - CHANNEL_NAME=patrikturi - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -479,7 +479,7 @@ services: environment: - CHANNEL_NAME=resttpowered - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -496,7 +496,7 @@ services: environment: - CHANNEL_NAME=spajkk - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -513,7 +513,7 @@ services: environment: - CHANNEL_NAME=styko - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -530,7 +530,7 @@ services: environment: - CHANNEL_NAME=tensterakdary - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -547,7 +547,7 @@ services: environment: - CHANNEL_NAME=tom__mm - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -564,7 +564,7 @@ services: environment: - CHANNEL_NAME=vvudy - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov @@ -581,7 +581,7 @@ services: environment: - CHANNEL_NAME=xnapycz - CHANNEL_LANGUAGE=cs - - TIMEDELTA_DAYS=6 + - TIMEDELTA_DAYS=7 - TIMEDELTA_DAYS_EXACT=false - CLIP_CREATE_FROM_CHAT=false - TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov diff --git a/generate-docker-compose.py b/generate-docker-compose.py index 01aa6cd..3e425c0 100644 --- a/generate-docker-compose.py +++ b/generate-docker-compose.py @@ -17,7 +17,7 @@ for channel in channels: "environment": [ f"CHANNEL_NAME={channel['name']}", f"CHANNEL_LANGUAGE={channel['language']}", - "TIMEDELTA_DAYS=6", + "TIMEDELTA_DAYS=7", "TIMEDELTA_DAYS_EXACT=false", "CLIP_CREATE_FROM_CHAT=false", "TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov", diff --git a/main.py b/main.py index 059d5cf..0b7eebf 100644 --- a/main.py +++ b/main.py @@ -127,7 +127,13 @@ def download_vod(vod_url, output_filename): if os.path.exists(output_filename): print(f"{output_filename} already exists. Skipping download.") return - command = ["yt-dlp", "--cookies", "cookies.txt", "-o", output_filename, vod_url] + command = [ + "yt-dlp", + "--cookies", "cookies.txt", + "-f", "worst", + "-o", output_filename, + vod_url + ] subprocess.run(command, check=True) print(f"Downloaded VOD to {output_filename}") @@ -279,6 +285,54 @@ def create_clip_from_comment_timestamp(video_file, comment_timestamp, vod): print(f"Clip created: {clip_filename}") return clip_filename + +def seconds_to_timestamp(seconds): + """Convert seconds to HH:MM:SS format.""" + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + secs = int(seconds % 60) + return f"{hours:02}:{minutes:02}:{secs:02}" + +def download_vod_segment(vod, match_start, duration=60): + """ + Downloads a segment of a VOD using yt-dlp. + + Parameters: + vod_url (str): The URL of the video. + output_filename (str): The desired output filename. + start_seconds (float): Start time in seconds (from faster-whisper). + duration (int): Duration of the segment in seconds (default 60 seconds). + """ + + clip_start = max(match_start - 15, 0) + clip_dir = base_dirs["clips_transcript"] + + vod_datetime = datetime.strptime(vod['created_at'], '%Y-%m-%dT%H:%M:%SZ') + date_folder = vod_datetime.strftime('%d-%m-%y') + + # Create a subfolder inside clip_dir for the date. + clip_date_dir = os.path.join(clip_dir, date_folder) + os.makedirs(clip_date_dir, exist_ok=True) + + clip_filename = os.path.join(clip_date_dir, f"clip_{vod['id']}_{int(clip_start)}.mp4") + + end_seconds = clip_start + duration + start_ts = seconds_to_timestamp(clip_start) + end_ts = seconds_to_timestamp(end_seconds) + + # yt-dlp download sections format: "*HH:MM:SS-HH:MM:SS" + segment = f"*{start_ts}-{end_ts}" + command = [ + "yt-dlp", + "--cookies", "cookies.txt", + "--download-sections", segment, + "-o", clip_filename, + vod["url"] + ] + + subprocess.run(command, check=True) + print(f"Downloaded segment from {start_ts} to {end_ts} into {clip_filename}") + # --------------------------- # Main Processing Pipeline # --------------------------- @@ -297,7 +351,8 @@ def handle_matches_fast(vod, video_filename, segments_data): start = match["start"] text = match["text"] print(f" - At {start:.2f}s: {text}") - create_clip_from_vod(video_filename, start, vod) + # create_clip_from_vod(video_filename, start, vod) + download_vod_segment(vod, start) else: print("faster_whisper -- No mentions of keywords.") @@ -313,6 +368,22 @@ def handle_matches(vod, video_filename, result): else: print(f"No mentions of {SEARCH_KEYWORDS} found in VOD {vod['id']}.") +def download_vod_audio(vod_url, output_filename): + if os.path.exists(output_filename): + print(f"{output_filename} already exists. Skipping download.") + return + command = [ + "yt-dlp", + "--cookies", "cookies.txt", + "-f", "worst", + "--extract-audio", + "--audio-format", "mp3", + "-o", output_filename, + vod_url + ] + subprocess.run(command, check=True) + print(f"Downloaded audio from VOD to {output_filename}") + def main(): print("Obtaining access token...") token = get_access_token() @@ -338,8 +409,9 @@ def main(): chat_log_filename = os.path.join(base_dirs["chat"], f"chat_{vod_id}.json") print(f"\nProcessing VOD: {vod_url}") - download_vod(vod_url, video_filename) - extract_audio(video_filename, audio_filename) + # download_vod(vod_url, video_filename) + # extract_audio(video_filename, audio_filename) + download_vod_audio(vod_url, audio_filename) print("Transcribing audio. This may take some time...") # Pass language and vod_id so that the transcript is saved and reused if available.