diff --git a/main.py b/main.py index fe2f20d..458d09b 100644 --- a/main.py +++ b/main.py @@ -116,33 +116,29 @@ def search_transcription(result, keywords): break # Stop checking further keywords for this segment return matches + def scrape_chat_log(vod_id, output_filename): + """ + Uses TwitchDownloaderCLI to download the chat log for a given VOD. + The chat log is saved in JSON format to output_filename. + """ if os.path.exists(output_filename): print(f"{output_filename} already exists. Skipping chat log scrape.") return - headers = { - "Client-ID": TWITCH_CLIENT_ID, - "Accept": "application/vnd.twitchtv.v5+json" - } - base_url = f"https://api.twitch.tv/v5/videos/{vod_id}/comments" - comments = [] - cursor = None - while True: - params = {} - if cursor: - params["cursor"] = cursor - response = requests.get(base_url, headers=headers, params=params) - if response.status_code != 200: - print(f"Error fetching chat comments for VOD {vod_id}: {response.text}") - break - data = response.json() - comments.extend(data.get("comments", [])) - cursor = data.get("_next") - if not cursor: - break - with open(output_filename, "w", encoding="utf-8") as f: - json.dump(comments, f, ensure_ascii=False, indent=4) - print(f"Chat log saved to {output_filename}") + + # Build the TwitchDownloaderCLI command. + # The command downloads the chat log in JSON format for the specified VOD. + command = [ + "TwitchDownloaderCLI", "chatdownload", + "--id", vod_id, + "--output", output_filename + ] + + try: + subprocess.run(command, check=True) + print(f"Chat log saved to {output_filename}") + except subprocess.CalledProcessError as e: + print(f"Error downloading chat log for VOD {vod_id}: {e}") def create_clip_from_vod(video_file, match_start, vod_id): clip_start = max(match_start - 15, 0) @@ -164,19 +160,30 @@ def create_clip_from_vod(video_file, match_start, vod_id): return clip_filename def find_comments_by_keywords(chat_log, keywords): + """ + Searches the chat log for any comments containing one of the given keywords. + The chat log can be either: + - a raw list of comment objects, or + - an object with a "comments" key containing the list. + Each comment is expected to have: + - a "message" key with the comment text (as a string) + - an "offset" key (or fallback to "content_offset_seconds") for the timestamp. + Returns a list of matching comment objects. + """ matching_comments = [] - # Ensure chat_log is a list of dictionaries. + # If the chat log is wrapped in an object, extract the list. + if isinstance(chat_log, dict) and "comments" in chat_log: + chat_log = chat_log["comments"] + for comment in chat_log: if not isinstance(comment, dict): continue - message = comment.get("message", {}) - if not isinstance(message, dict): - continue - text = message.get("body", "").lower() + # Get the message text; TwitchDownloaderCLI outputs it as a string in "message" + message_text = comment['message']['body'].lower() for keyword in keywords: - if keyword.lower() in text: + if keyword.lower() in message_text: matching_comments.append(comment) - break + break # No need to check further keywords for this comment. return matching_comments def create_clip_from_comment_timestamp(video_file, comment_timestamp, vod_id): @@ -267,7 +274,8 @@ def main(): comment_matches = find_comments_by_keywords(chat_log, SEARCH_KEYWORDS) if comment_matches: for comment in comment_matches: - timestamp = comment.get("content_offset_seconds") + # Try to get the timestamp from the "offset" field (or fallback to "content_offset_seconds") + timestamp = comment["content_offset_seconds"] print(f"Found a matching comment at {timestamp} seconds.") create_clip_from_comment_timestamp(video_filename, timestamp, vod_id) else: