This commit is contained in:
t0is 2025-03-03 17:07:51 +01:00
parent 2160bef026
commit 960f841cb4
2 changed files with 54 additions and 36 deletions

View File

@ -17,8 +17,9 @@ for channel in channels:
"environment": [
f"CHANNEL_NAME={channel['name']}",
f"CHANNEL_LANGUAGE={channel['language']}",
"TIMEDELTA_DAYS=3",
"TIMEDELTA_DAYS_EXACT=false",
"TIMEDELTA_DAYS=1",
"TIMEDELTA_DAYS_EXACT=true",
"CLIP_CREATE_FROM_CHAT=false",
"TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov",
"TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es"
],

85
main.py
View File

@ -13,9 +13,10 @@ import json
TWITCH_CLIENT_ID = os.environ.get("TWITCH_CLIENT_ID", "")
TWITCH_CLIENT_SECRET = os.environ.get("TWITCH_CLIENT_SECRET", "")
CHANNEL_NAME = os.environ.get("CHANNEL_NAME", "madmonq")
TIMEDELTA_DAYS = int(os.environ.get("TIMEDELTA_DAYS", "3"))
TIMEDELTA_DAYS = int(os.environ.get("TIMEDELTA_DAYS", "1"))
TIMEDELTA_DAYS_EXACT = os.environ.get("TIMEDELTA_DAYS_EXACT", "false").lower() in ("true", "1", "yes")
CHANNEL_LANGUAGE = os.environ.get("CHANNEL_LANGUAGE", "en")
CLIP_CREATE_FROM_CHAT = os.environ.get("CLIP_CREATE_FROM_CHAT", "false").lower() in ("true", "1", "yes")
CHANNEL_LANGUAGE = os.environ.get("CHANNEL_LANGUAGE", "cs")
SEARCH_KEYWORDS = [
"madmonq",
"madmonge",
@ -184,12 +185,20 @@ def scrape_chat_log(vod_id, output_filename):
except subprocess.CalledProcessError as e:
print(f"Error downloading chat log for VOD {vod_id}: {e}")
def create_clip_from_vod(video_file, match_start, vod_id):
def create_clip_from_vod(video_file, match_start, vod):
clip_start = max(match_start - 15, 0)
clip_duration = 60 # seconds
clip_dir = base_dirs["clips_transcript"]
os.makedirs(clip_dir, exist_ok=True)
clip_filename = os.path.join(clip_dir, f"clip_{vod_id}_{int(match_start)}.mp4")
vod_datetime = datetime.strptime(vod['created_at'], '%Y-%m-%dT%H:%M:%SZ')
date_folder = vod_datetime.strftime('%d-%m-%y')
# Create a subfolder inside clip_dir for the date.
clip_date_dir = os.path.join(clip_dir, date_folder)
os.makedirs(clip_date_dir, exist_ok=True)
# Build the clip filename inside the date folder.
clip_filename = os.path.join(clip_date_dir, f"clip_{vod['id']}_{int(match_start)}.mp4")
command = [
"ffmpeg",
"-ss", str(clip_start),
@ -230,12 +239,19 @@ def find_comments_by_keywords(chat_log, keywords):
break # No need to check further keywords for this comment.
return matching_comments
def create_clip_from_comment_timestamp(video_file, comment_timestamp, vod_id):
def create_clip_from_comment_timestamp(video_file, comment_timestamp, vod):
clip_start = max(comment_timestamp - 15, 0)
clip_duration = 60 # seconds
clip_dir = base_dirs["clips_chat"]
os.makedirs(clip_dir, exist_ok=True)
clip_filename = os.path.join(clip_dir, f"clip_{vod_id}_{int(comment_timestamp)}.mp4")
vod_datetime = datetime.strptime(vod['created_at'], '%Y-%m-%dT%H:%M:%SZ')
date_folder = vod_datetime.strftime('%d-%m-%y')
# Create a subfolder inside clip_dir for the date.
clip_date_dir = os.path.join(clip_dir, date_folder)
os.makedirs(clip_date_dir, exist_ok=True)
# Build the clip filename inside the date folder.
clip_filename = os.path.join(clip_date_dir, f"clip_{vod['id']}_{int(comment_timestamp)}.mp4")
command = [
"ffmpeg",
"-ss", str(clip_start),
@ -252,7 +268,7 @@ def create_clip_from_comment_timestamp(video_file, comment_timestamp, vod_id):
# ---------------------------
# Main Processing Pipeline
# ---------------------------
def handle_matches_fast(vod_id, video_filename, result):
def handle_matches_fast(vod, video_filename, result):
matches_fast = []
for segment in result:
segment_text = segment.text.lower()
@ -262,27 +278,27 @@ def handle_matches_fast(vod_id, video_filename, result):
break
if matches_fast:
print(f"faster_whisper -- Found {len(matches_fast)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod_id}:")
print(f"faster_whisper -- Found {len(matches_fast)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod['id']}:")
for match in matches_fast:
start = match.start # faster-whisper segment attribute
text = match.text
print(f" - At {start:.2f}s: {text}")
create_clip_from_vod(video_filename, start, vod_id)
create_clip_from_vod(video_filename, start, vod)
else:
print("faster_whisper -- No mentions of keywords.")
def handle_matches(vod_id, video_filename, result):
def handle_matches(vod, video_filename, result):
matches = search_transcription(result, SEARCH_KEYWORDS)
if matches:
print(f"Found {len(matches)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod_id}:")
print(f"Found {len(matches)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod['id']}:")
for match in matches:
start = match["start"]
text = match["text"]
print(f" - At {start:.2f}s: {text}")
create_clip_from_vod(video_filename, start, vod_id)
create_clip_from_vod(video_filename, start, vod)
else:
print(f"No mentions of {SEARCH_KEYWORDS} found in VOD {vod_id}.")
print(f"No mentions of {SEARCH_KEYWORDS} found in VOD {vod['id']}.")
@ -329,31 +345,32 @@ def main():
print("Transcribing audio. This may take some time...")
result = transcribe_audio_fast(audio_filename, MODEL_NAME)
scrape_chat_log(vod_id, chat_log_filename)
if CLIP_CREATE_FROM_CHAT:
scrape_chat_log(vod_id, chat_log_filename)
# Search transcript for keywords
# handle_matches(vod_id, video_filename, result)
handle_matches_fast(vod_id, video_filename, result)
# Load chat log from file
try:
with open(chat_log_filename, "r", encoding="utf-8") as f:
chat_log = json.load(f)
except Exception as e:
print(f"Error loading chat log: {e}")
chat_log = []
if CLIP_CREATE_FROM_CHAT:
# Load chat log from file
try:
with open(chat_log_filename, "r", encoding="utf-8") as f:
chat_log = json.load(f)
except Exception as e:
print(f"Error loading chat log: {e}")
chat_log = []
# Search chat log using an array of keywords (using the same keywords as for transcript)
comment_matches = find_comments_by_keywords(chat_log, SEARCH_KEYWORDS)
if comment_matches:
for comment in comment_matches:
# Try to get the timestamp from the "offset" field (or fallback to "content_offset_seconds")
timestamp = comment["content_offset_seconds"]
print(f"Found a matching comment at {timestamp} seconds.")
create_clip_from_comment_timestamp(video_filename, timestamp, vod_id)
else:
print("No matching comments found.")
# Search chat log using an array of keywords (using the same keywords as for transcript)
comment_matches = find_comments_by_keywords(chat_log, SEARCH_KEYWORDS)
if comment_matches:
for comment in comment_matches:
# Try to get the timestamp from the "offset" field (or fallback to "content_offset_seconds")
timestamp = comment["content_offset_seconds"]
print(f"Found a matching comment at {timestamp} seconds.")
create_clip_from_comment_timestamp(video_filename, timestamp, vod)
else:
print("No matching comments found.")
if __name__ == "__main__":
main()