chat edits

2025-02-21 15:24:11 +01:00 · 2025-02-21 15:24:11 +01:00 · 94a4084236
commit 94a4084236
parent 109514f14f
1 changed files with 39 additions and 31 deletions
--- a/main.py
+++ b/main.py
@ -116,33 +116,29 @@ def search_transcription(result, keywords):
                    break  # Stop checking further keywords for this segment
    return matches

+
 def scrape_chat_log(vod_id, output_filename):
+    """
+    Uses TwitchDownloaderCLI to download the chat log for a given VOD.
+    The chat log is saved in JSON format to output_filename.
+    """
    if os.path.exists(output_filename):
        print(f"{output_filename} already exists. Skipping chat log scrape.")
        return
-    headers = {
-        "Client-ID": TWITCH_CLIENT_ID,
-        "Accept": "application/vnd.twitchtv.v5+json"
-    }
-    base_url = f"https://api.twitch.tv/v5/videos/{vod_id}/comments"
-    comments = []
-    cursor = None
-    while True:
-        params = {}
-        if cursor:
-            params["cursor"] = cursor
-        response = requests.get(base_url, headers=headers, params=params)
-        if response.status_code != 200:
-            print(f"Error fetching chat comments for VOD {vod_id}: {response.text}")
-            break
-        data = response.json()
-        comments.extend(data.get("comments", []))
-        cursor = data.get("_next")
-        if not cursor:
-            break
-    with open(output_filename, "w", encoding="utf-8") as f:
-        json.dump(comments, f, ensure_ascii=False, indent=4)
-    print(f"Chat log saved to {output_filename}")
+
+    # Build the TwitchDownloaderCLI command.
+    # The command downloads the chat log in JSON format for the specified VOD.
+    command = [
+        "TwitchDownloaderCLI", "chatdownload",
+        "--id", vod_id,
+        "--output", output_filename
+    ]
+
+    try:
+        subprocess.run(command, check=True)
+        print(f"Chat log saved to {output_filename}")
+    except subprocess.CalledProcessError as e:
+        print(f"Error downloading chat log for VOD {vod_id}: {e}")

 def create_clip_from_vod(video_file, match_start, vod_id):
    clip_start = max(match_start - 15, 0)
@ -164,19 +160,30 @@ def create_clip_from_vod(video_file, match_start, vod_id):
    return clip_filename

 def find_comments_by_keywords(chat_log, keywords):
+    """
+    Searches the chat log for any comments containing one of the given keywords.
+    The chat log can be either:
+      - a raw list of comment objects, or
+      - an object with a "comments" key containing the list.
+    Each comment is expected to have:
+      - a "message" key with the comment text (as a string)
+      - an "offset" key (or fallback to "content_offset_seconds") for the timestamp.
+    Returns a list of matching comment objects.
+    """
    matching_comments = []
-    # Ensure chat_log is a list of dictionaries.
+    # If the chat log is wrapped in an object, extract the list.
+    if isinstance(chat_log, dict) and "comments" in chat_log:
+        chat_log = chat_log["comments"]
+
    for comment in chat_log:
        if not isinstance(comment, dict):
            continue
-        message = comment.get("message", {})
-        if not isinstance(message, dict):
-            continue
-        text = message.get("body", "").lower()
+        # Get the message text; TwitchDownloaderCLI outputs it as a string in "message"
+        message_text = comment['message']['body'].lower()
        for keyword in keywords:
-            if keyword.lower() in text:
+            if keyword.lower() in message_text:
                matching_comments.append(comment)
-                break
+                break  # No need to check further keywords for this comment.
    return matching_comments

 def create_clip_from_comment_timestamp(video_file, comment_timestamp, vod_id):
@ -267,7 +274,8 @@ def main():
        comment_matches = find_comments_by_keywords(chat_log, SEARCH_KEYWORDS)
        if comment_matches:
            for comment in comment_matches:
-                timestamp = comment.get("content_offset_seconds")
+                # Try to get the timestamp from the "offset" field (or fallback to "content_offset_seconds")
+                timestamp = comment["content_offset_seconds"]
                print(f"Found a matching comment at {timestamp} seconds.")
                create_clip_from_comment_timestamp(video_filename, timestamp, vod_id)
        else: