edits

2025-03-20 16:14:29 +01:00 · 2025-03-20 16:14:29 +01:00 · 47ebcb040b
commit 47ebcb040b
parent 82568705ab
3 changed files with 260 additions and 2445 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
--- a/generate-docker-compose.py
+++ b/generate-docker-compose.py
@ -14,18 +14,15 @@ yaml.add_representer(InlineList, inline_list_representer)
 with open("channels.json", "r") as f:
    channels = json.load(f)
-compose = {
+# Instead of multiple services, pass all channels as a JSON string to one container
-    "services": {}
+channels_json_str = json.dumps(channels)
 }
-# For each channel, create a service entry
+compose = {
-for channel in channels:
+    "services": {
-    service_name = f"scanner_{channel['name']}"
+        "transcriptor": {
    compose["services"][service_name] = {
            "image": "t0is/madmonq-transcriptor-image:cuda",
            "environment": [
-            f"CHANNEL_NAME={channel['name']}",
+                f"CHANNELS_JSON={channels_json_str}",
            f"CHANNEL_LANGUAGE={channel['language']}",
                "TIMEDELTA_DAYS=10",
                "TIMEDELTA_DAYS_EXACT=false",
                "CLIP_CREATE_FROM_CHAT=false",
@ -53,6 +50,27 @@ for channel in channels:
                    }
                }
            }
        },
        "downloader": {
            "image": "t0is/madmonq-transcriptor-image:download-only",
            "environment": [
                f"CHANNELS_JSON={channels_json_str}",
                "TIMEDELTA_DAYS=10",
                "TIMEDELTA_DAYS_EXACT=false",
                "CLIP_CREATE_FROM_CHAT=false",
                "TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov",
                "TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es"
            ],
            "volumes": [
                "/shared/transcriptor/clips:/app/clips",
                "/shared/transcriptor/vods:/app/vods",
                "/shared/transcriptor/audio:/app/audio",
                "/shared/transcriptor/chat:/app/chat",
                "/shared/transcriptor/models:/app/models",
                "/shared/transcriptor/transcripts:/app/transcripts"
            ]
        }
    }
 }
 # Write the docker-compose file
--- a/main.py
+++ b/main.py
@ -12,7 +12,6 @@ import json
 # ---------------------------
 TWITCH_CLIENT_ID = os.environ.get("TWITCH_CLIENT_ID", "")
 TWITCH_CLIENT_SECRET = os.environ.get("TWITCH_CLIENT_SECRET", "")
 CHANNEL_NAME = os.environ.get("CHANNEL_NAME", "madmonq")
 TIMEDELTA_DAYS = int(os.environ.get("TIMEDELTA_DAYS", "1"))
 TIMEDELTA_DAYS_EXACT = os.environ.get("TIMEDELTA_DAYS_EXACT", "false").lower() in ("true", "1", "yes")
 CLIP_CREATE_FROM_CHAT = os.environ.get("CLIP_CREATE_FROM_CHAT", "false").lower() in ("true", "1", "yes")
@ -40,19 +39,12 @@ SEARCH_KEYWORDS = [
 ]
 MODEL_NAME = "turbo"  # Whisper model
 # Define base directories for each file category under a folder named after the channel.
 base_dirs = {
    "vods": os.path.join("vods", CHANNEL_NAME),
    "audio": os.path.join("audio", CHANNEL_NAME),
    "transcripts": os.path.join("transcripts", CHANNEL_NAME),
    "chat": os.path.join("chat", CHANNEL_NAME),
    "clips_transcript": os.path.join("clips", CHANNEL_NAME, "from_vod"),
    "clips_chat": os.path.join("clips", CHANNEL_NAME, "from_chat")
 }
-# Create directories if they do not exist.
+channels_str = os.environ.get("CHANNELS_JSON", "[]")
-for path in base_dirs.values():
+try:
-    os.makedirs(path, exist_ok=True)
+    channels = json.loads(channels_str)
 except json.JSONDecodeError:
    raise ValueError("Invalid JSON in CHANNELS_JSON environment variable")
 # ---------------------------
 # Twitch API Helper Functions
@ -390,14 +382,45 @@ def main():
    token = get_access_token()
    print("Access token obtained.")
-    channel_id = get_channel_id(CHANNEL_NAME, token)
+
    for channel in channels:
        try:
            print(f"Channel Name: {channel['name']}, Language: {channel['language']}")
            channel_name = channel['name']
            base_dirs = {
                "vods": os.path.join("vods", channel_name),
                "audio": os.path.join("audio", channel_name),
                "transcripts": os.path.join("transcripts", channel_name),
                "chat": os.path.join("chat", channel_name),
                "clips_transcript": os.path.join("clips", channel_name, "from_vod"),
                "clips_chat": os.path.join("clips", channel_name, "from_chat")
            }
            # Create directories if they do not exist.
            for path in base_dirs.values():
                os.makedirs(path, exist_ok=True)
            # if channel['platform'] == "youtube":
            #     channel_id = get_youtube_channel_id(channel_name, YOUTUBE_API_KEY)
            #     if not channel_id:
            #         print(f"No channel {channel_name} found on YouTube.")
            #         continue
            #     else:
            #         vods = get_youtube_livestream_vods(channel_id, YOUTUBE_API_KEY)
            # else:
            channel_id = get_channel_id(channel_name, token)
            if not channel_id:
-        return
+                print(f"No channel {channel_name} found on Twitch.")
                continue
            vods = get_vods(channel_id, token)
            if not vods:
-        print("No VODs from yesterday found.")
+                print("No VODs found.")
-        return
+                continue
            for vod in vods:
                vod_url = vod["url"]
@ -412,7 +435,10 @@ def main():
                print(f"\nProcessing VOD: {vod_url}")
                # download_vod(vod_url, video_filename)
                # extract_audio(video_filename, audio_filename)
-        download_vod_audio(vod_url, audio_filename)
+                # download_vod_audio(vod_url, audio_filename)
                if not os.path.exists(audio_filename):
                    print(f"{audio_filename} not downloaded yet, skipping...")
                    continue
                print("Transcribing audio. This may take some time...")
                # Pass language and vod_id so that the transcript is saved and reused if available.
@ -441,6 +467,7 @@ def main():
                            create_clip_from_comment_timestamp(video_filename, timestamp, vod)
                    else:
                        print("No matching comments found.")
-
+        except:
            continue
 if __name__ == "__main__":
    main()