From 960f841cb498754c38ce9ab546ff64d6b05272a3 Mon Sep 17 00:00:00 2001 From: t0is Date: Mon, 3 Mar 2025 17:07:51 +0100 Subject: [PATCH] edits --- generate-docker-compose.py | 5 ++- main.py | 85 +++++++++++++++++++++++--------------- 2 files changed, 54 insertions(+), 36 deletions(-) diff --git a/generate-docker-compose.py b/generate-docker-compose.py index aa364c9..2541297 100644 --- a/generate-docker-compose.py +++ b/generate-docker-compose.py @@ -17,8 +17,9 @@ for channel in channels: "environment": [ f"CHANNEL_NAME={channel['name']}", f"CHANNEL_LANGUAGE={channel['language']}", - "TIMEDELTA_DAYS=3", - "TIMEDELTA_DAYS_EXACT=false", + "TIMEDELTA_DAYS=1", + "TIMEDELTA_DAYS_EXACT=true", + "CLIP_CREATE_FROM_CHAT=false", "TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov", "TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es" ], diff --git a/main.py b/main.py index ea39c03..d4d3295 100644 --- a/main.py +++ b/main.py @@ -13,9 +13,10 @@ import json TWITCH_CLIENT_ID = os.environ.get("TWITCH_CLIENT_ID", "") TWITCH_CLIENT_SECRET = os.environ.get("TWITCH_CLIENT_SECRET", "") CHANNEL_NAME = os.environ.get("CHANNEL_NAME", "madmonq") -TIMEDELTA_DAYS = int(os.environ.get("TIMEDELTA_DAYS", "3")) +TIMEDELTA_DAYS = int(os.environ.get("TIMEDELTA_DAYS", "1")) TIMEDELTA_DAYS_EXACT = os.environ.get("TIMEDELTA_DAYS_EXACT", "false").lower() in ("true", "1", "yes") -CHANNEL_LANGUAGE = os.environ.get("CHANNEL_LANGUAGE", "en") +CLIP_CREATE_FROM_CHAT = os.environ.get("CLIP_CREATE_FROM_CHAT", "false").lower() in ("true", "1", "yes") +CHANNEL_LANGUAGE = os.environ.get("CHANNEL_LANGUAGE", "cs") SEARCH_KEYWORDS = [ "madmonq", "madmonge", @@ -184,12 +185,20 @@ def scrape_chat_log(vod_id, output_filename): except subprocess.CalledProcessError as e: print(f"Error downloading chat log for VOD {vod_id}: {e}") -def create_clip_from_vod(video_file, match_start, vod_id): +def create_clip_from_vod(video_file, match_start, vod): clip_start = max(match_start - 15, 0) clip_duration = 60 # seconds clip_dir = base_dirs["clips_transcript"] - os.makedirs(clip_dir, exist_ok=True) - clip_filename = os.path.join(clip_dir, f"clip_{vod_id}_{int(match_start)}.mp4") + + vod_datetime = datetime.strptime(vod['created_at'], '%Y-%m-%dT%H:%M:%SZ') + date_folder = vod_datetime.strftime('%d-%m-%y') + + # Create a subfolder inside clip_dir for the date. + clip_date_dir = os.path.join(clip_dir, date_folder) + os.makedirs(clip_date_dir, exist_ok=True) + + # Build the clip filename inside the date folder. + clip_filename = os.path.join(clip_date_dir, f"clip_{vod['id']}_{int(match_start)}.mp4") command = [ "ffmpeg", "-ss", str(clip_start), @@ -230,12 +239,19 @@ def find_comments_by_keywords(chat_log, keywords): break # No need to check further keywords for this comment. return matching_comments -def create_clip_from_comment_timestamp(video_file, comment_timestamp, vod_id): +def create_clip_from_comment_timestamp(video_file, comment_timestamp, vod): clip_start = max(comment_timestamp - 15, 0) clip_duration = 60 # seconds clip_dir = base_dirs["clips_chat"] - os.makedirs(clip_dir, exist_ok=True) - clip_filename = os.path.join(clip_dir, f"clip_{vod_id}_{int(comment_timestamp)}.mp4") + vod_datetime = datetime.strptime(vod['created_at'], '%Y-%m-%dT%H:%M:%SZ') + date_folder = vod_datetime.strftime('%d-%m-%y') + + # Create a subfolder inside clip_dir for the date. + clip_date_dir = os.path.join(clip_dir, date_folder) + os.makedirs(clip_date_dir, exist_ok=True) + + # Build the clip filename inside the date folder. + clip_filename = os.path.join(clip_date_dir, f"clip_{vod['id']}_{int(comment_timestamp)}.mp4") command = [ "ffmpeg", "-ss", str(clip_start), @@ -252,7 +268,7 @@ def create_clip_from_comment_timestamp(video_file, comment_timestamp, vod_id): # --------------------------- # Main Processing Pipeline # --------------------------- -def handle_matches_fast(vod_id, video_filename, result): +def handle_matches_fast(vod, video_filename, result): matches_fast = [] for segment in result: segment_text = segment.text.lower() @@ -262,27 +278,27 @@ def handle_matches_fast(vod_id, video_filename, result): break if matches_fast: - print(f"faster_whisper -- Found {len(matches_fast)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod_id}:") + print(f"faster_whisper -- Found {len(matches_fast)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod['id']}:") for match in matches_fast: start = match.start # faster-whisper segment attribute text = match.text print(f" - At {start:.2f}s: {text}") - create_clip_from_vod(video_filename, start, vod_id) + create_clip_from_vod(video_filename, start, vod) else: print("faster_whisper -- No mentions of keywords.") -def handle_matches(vod_id, video_filename, result): +def handle_matches(vod, video_filename, result): matches = search_transcription(result, SEARCH_KEYWORDS) if matches: - print(f"Found {len(matches)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod_id}:") + print(f"Found {len(matches)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod['id']}:") for match in matches: start = match["start"] text = match["text"] print(f" - At {start:.2f}s: {text}") - create_clip_from_vod(video_filename, start, vod_id) + create_clip_from_vod(video_filename, start, vod) else: - print(f"No mentions of {SEARCH_KEYWORDS} found in VOD {vod_id}.") + print(f"No mentions of {SEARCH_KEYWORDS} found in VOD {vod['id']}.") @@ -329,31 +345,32 @@ def main(): print("Transcribing audio. This may take some time...") result = transcribe_audio_fast(audio_filename, MODEL_NAME) - scrape_chat_log(vod_id, chat_log_filename) + if CLIP_CREATE_FROM_CHAT: + scrape_chat_log(vod_id, chat_log_filename) # Search transcript for keywords # handle_matches(vod_id, video_filename, result) handle_matches_fast(vod_id, video_filename, result) - # Load chat log from file - try: - with open(chat_log_filename, "r", encoding="utf-8") as f: - chat_log = json.load(f) - except Exception as e: - print(f"Error loading chat log: {e}") - chat_log = [] + if CLIP_CREATE_FROM_CHAT: + # Load chat log from file + try: + with open(chat_log_filename, "r", encoding="utf-8") as f: + chat_log = json.load(f) + except Exception as e: + print(f"Error loading chat log: {e}") + chat_log = [] - - # Search chat log using an array of keywords (using the same keywords as for transcript) - comment_matches = find_comments_by_keywords(chat_log, SEARCH_KEYWORDS) - if comment_matches: - for comment in comment_matches: - # Try to get the timestamp from the "offset" field (or fallback to "content_offset_seconds") - timestamp = comment["content_offset_seconds"] - print(f"Found a matching comment at {timestamp} seconds.") - create_clip_from_comment_timestamp(video_filename, timestamp, vod_id) - else: - print("No matching comments found.") + # Search chat log using an array of keywords (using the same keywords as for transcript) + comment_matches = find_comments_by_keywords(chat_log, SEARCH_KEYWORDS) + if comment_matches: + for comment in comment_matches: + # Try to get the timestamp from the "offset" field (or fallback to "content_offset_seconds") + timestamp = comment["content_offset_seconds"] + print(f"Found a matching comment at {timestamp} seconds.") + create_clip_from_comment_timestamp(video_filename, timestamp, vod) + else: + print("No matching comments found.") if __name__ == "__main__": main() \ No newline at end of file