edits

2025-03-03 17:07:51 +01:00 · 2025-03-03 17:07:51 +01:00 · 960f841cb4
commit 960f841cb4
parent 2160bef026
2 changed files with 54 additions and 36 deletions
--- a/generate-docker-compose.py
+++ b/generate-docker-compose.py
@ -17,8 +17,9 @@ for channel in channels:
        "environment": [
            f"CHANNEL_NAME={channel['name']}",
            f"CHANNEL_LANGUAGE={channel['language']}",
-            "TIMEDELTA_DAYS=3",
-            "TIMEDELTA_DAYS_EXACT=false",
+            "TIMEDELTA_DAYS=1",
+            "TIMEDELTA_DAYS_EXACT=true",
+            "CLIP_CREATE_FROM_CHAT=false",
            "TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov",
            "TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es"
        ],
--- a/main.py
+++ b/main.py
@ -13,9 +13,10 @@ import json
 TWITCH_CLIENT_ID = os.environ.get("TWITCH_CLIENT_ID", "")
 TWITCH_CLIENT_SECRET = os.environ.get("TWITCH_CLIENT_SECRET", "")
 CHANNEL_NAME = os.environ.get("CHANNEL_NAME", "madmonq")
-TIMEDELTA_DAYS = int(os.environ.get("TIMEDELTA_DAYS", "3"))
+TIMEDELTA_DAYS = int(os.environ.get("TIMEDELTA_DAYS", "1"))
 TIMEDELTA_DAYS_EXACT = os.environ.get("TIMEDELTA_DAYS_EXACT", "false").lower() in ("true", "1", "yes")
-CHANNEL_LANGUAGE = os.environ.get("CHANNEL_LANGUAGE", "en")
+CLIP_CREATE_FROM_CHAT = os.environ.get("CLIP_CREATE_FROM_CHAT", "false").lower() in ("true", "1", "yes")
+CHANNEL_LANGUAGE = os.environ.get("CHANNEL_LANGUAGE", "cs")
 SEARCH_KEYWORDS = [
  "madmonq",
  "madmonge",
@ -184,12 +185,20 @@ def scrape_chat_log(vod_id, output_filename):
    except subprocess.CalledProcessError as e:
        print(f"Error downloading chat log for VOD {vod_id}: {e}")

-def create_clip_from_vod(video_file, match_start, vod_id):
+def create_clip_from_vod(video_file, match_start, vod):
    clip_start = max(match_start - 15, 0)
    clip_duration = 60  # seconds
    clip_dir = base_dirs["clips_transcript"]
-    os.makedirs(clip_dir, exist_ok=True)
-    clip_filename = os.path.join(clip_dir, f"clip_{vod_id}_{int(match_start)}.mp4")
+
+    vod_datetime = datetime.strptime(vod['created_at'], '%Y-%m-%dT%H:%M:%SZ')
+    date_folder = vod_datetime.strftime('%d-%m-%y')
+
+    # Create a subfolder inside clip_dir for the date.
+    clip_date_dir = os.path.join(clip_dir, date_folder)
+    os.makedirs(clip_date_dir, exist_ok=True)
+
+    # Build the clip filename inside the date folder.
+    clip_filename = os.path.join(clip_date_dir, f"clip_{vod['id']}_{int(match_start)}.mp4")
    command = [
        "ffmpeg",
        "-ss", str(clip_start),
@ -230,12 +239,19 @@ def find_comments_by_keywords(chat_log, keywords):
                break  # No need to check further keywords for this comment.
    return matching_comments

-def create_clip_from_comment_timestamp(video_file, comment_timestamp, vod_id):
+def create_clip_from_comment_timestamp(video_file, comment_timestamp, vod):
    clip_start = max(comment_timestamp - 15, 0)
    clip_duration = 60  # seconds
    clip_dir = base_dirs["clips_chat"]
-    os.makedirs(clip_dir, exist_ok=True)
-    clip_filename = os.path.join(clip_dir, f"clip_{vod_id}_{int(comment_timestamp)}.mp4")
+    vod_datetime = datetime.strptime(vod['created_at'], '%Y-%m-%dT%H:%M:%SZ')
+    date_folder = vod_datetime.strftime('%d-%m-%y')
+
+    # Create a subfolder inside clip_dir for the date.
+    clip_date_dir = os.path.join(clip_dir, date_folder)
+    os.makedirs(clip_date_dir, exist_ok=True)
+
+    # Build the clip filename inside the date folder.
+    clip_filename = os.path.join(clip_date_dir, f"clip_{vod['id']}_{int(comment_timestamp)}.mp4")
    command = [
        "ffmpeg",
        "-ss", str(clip_start),
@ -252,7 +268,7 @@ def create_clip_from_comment_timestamp(video_file, comment_timestamp, vod_id):
 # ---------------------------
 # Main Processing Pipeline
 # ---------------------------
-def handle_matches_fast(vod_id, video_filename, result):
+def handle_matches_fast(vod, video_filename, result):
    matches_fast = []
    for segment in result:
        segment_text = segment.text.lower()
@ -262,27 +278,27 @@ def handle_matches_fast(vod_id, video_filename, result):
                break

    if matches_fast:
-        print(f"faster_whisper -- Found {len(matches_fast)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod_id}:")
+        print(f"faster_whisper -- Found {len(matches_fast)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod['id']}:")
        for match in matches_fast:
            start = match.start  # faster-whisper segment attribute
            text = match.text
            print(f" - At {start:.2f}s: {text}")
-            create_clip_from_vod(video_filename, start, vod_id)
+            create_clip_from_vod(video_filename, start, vod)
    else:
        print("faster_whisper -- No mentions of keywords.")


-def handle_matches(vod_id, video_filename, result):
+def handle_matches(vod, video_filename, result):
    matches = search_transcription(result, SEARCH_KEYWORDS)
    if matches:
-        print(f"Found {len(matches)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod_id}:")
+        print(f"Found {len(matches)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod['id']}:")
        for match in matches:
            start = match["start"]
            text = match["text"]
            print(f" - At {start:.2f}s: {text}")
-            create_clip_from_vod(video_filename, start, vod_id)
+            create_clip_from_vod(video_filename, start, vod)
    else:
-        print(f"No mentions of {SEARCH_KEYWORDS} found in VOD {vod_id}.")
+        print(f"No mentions of {SEARCH_KEYWORDS} found in VOD {vod['id']}.")



@ -329,31 +345,32 @@ def main():
        print("Transcribing audio. This may take some time...")
        result = transcribe_audio_fast(audio_filename, MODEL_NAME)

-        scrape_chat_log(vod_id, chat_log_filename)
+        if CLIP_CREATE_FROM_CHAT:
+            scrape_chat_log(vod_id, chat_log_filename)

        # Search transcript for keywords
        # handle_matches(vod_id, video_filename, result)
        handle_matches_fast(vod_id, video_filename, result)

-        # Load chat log from file
-        try:
-            with open(chat_log_filename, "r", encoding="utf-8") as f:
-                chat_log = json.load(f)
-        except Exception as e:
-            print(f"Error loading chat log: {e}")
-            chat_log = []
+        if CLIP_CREATE_FROM_CHAT:
+            # Load chat log from file
+            try:
+                with open(chat_log_filename, "r", encoding="utf-8") as f:
+                    chat_log = json.load(f)
+            except Exception as e:
+                print(f"Error loading chat log: {e}")
+                chat_log = []

-
-        # Search chat log using an array of keywords (using the same keywords as for transcript)
-        comment_matches = find_comments_by_keywords(chat_log, SEARCH_KEYWORDS)
-        if comment_matches:
-            for comment in comment_matches:
-                # Try to get the timestamp from the "offset" field (or fallback to "content_offset_seconds")
-                timestamp = comment["content_offset_seconds"]
-                print(f"Found a matching comment at {timestamp} seconds.")
-                create_clip_from_comment_timestamp(video_filename, timestamp, vod_id)
-        else:
-            print("No matching comments found.")
+            # Search chat log using an array of keywords (using the same keywords as for transcript)
+            comment_matches = find_comments_by_keywords(chat_log, SEARCH_KEYWORDS)
+            if comment_matches:
+                for comment in comment_matches:
+                    # Try to get the timestamp from the "offset" field (or fallback to "content_offset_seconds")
+                    timestamp = comment["content_offset_seconds"]
+                    print(f"Found a matching comment at {timestamp} seconds.")
+                    create_clip_from_comment_timestamp(video_filename, timestamp, vod)
+            else:
+                print("No matching comments found.")

 if __name__ == "__main__":
    main()