From cbdfa9f76a7c8722952c41113c73d4fe6392a8be Mon Sep 17 00:00:00 2001 From: t0is Date: Wed, 26 Mar 2025 14:23:36 +0100 Subject: [PATCH] insert also clip --- main.py | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/main.py b/main.py index 9d5b9e3..28932d9 100644 --- a/main.py +++ b/main.py @@ -273,7 +273,7 @@ def seconds_to_timestamp(seconds): secs = int(seconds % 60) return f"{hours:02}:{minutes:02}:{secs:02}" -def download_vod_segment(vod, match_start, duration=60): +def download_vod_segment(db, vod, match_start, duration=60): """ Downloads a segment of a VOD using yt-dlp. @@ -312,11 +312,12 @@ def download_vod_segment(vod, match_start, duration=60): subprocess.run(command, check=True) print(f"Downloaded segment from {start_ts} to {end_ts} into {clip_filename}") + insert_clip(db, vod['id'], clip_filename) # --------------------------- # Main Processing Pipeline # --------------------------- -def handle_matches_fast(vod, segments_data): +def handle_matches_fast(db, vod, segments_data): matches_fast = [] for segment in segments_data: segment_text = segment["text"].lower() @@ -332,7 +333,7 @@ def handle_matches_fast(vod, segments_data): text = match["text"] print(f" - At {start:.2f}s: {text}") # create_clip_from_vod(video_filename, start, vod) - download_vod_segment(vod, start) + download_vod_segment(db, vod, start) else: print("faster_whisper -- No mentions of keywords.") @@ -399,8 +400,6 @@ def insert_transcription(db, video_id, filename): db: A MariaDB connection object. video_id (int): The foreign key referencing the videos table. filename (str): The transcription file name. - transcription_start (datetime, optional): The transcription start time. Defaults to now if None. - transcription_finish (datetime, optional): The transcription finish time. Defaults to None. Returns: int: The ID of the inserted transcription record. @@ -418,6 +417,31 @@ def insert_transcription(db, video_id, filename): print(f"Inserted transcription for video_id {video_id} with filename '{filename}' (ID: {inserted_id})") return inserted_id +def insert_clip(db, video_id, filename): + """ + Inserts a new transcription record into the transcriptions table. + + Parameters: + db: A MariaDB connection object. + video_id (int): The foreign key referencing the videos table. + filename (str): The transcription file name. + + Returns: + int: The ID of the inserted transcription record. + """ + + cursor = db.cursor() + query = """ + INSERT INTO clips (video_id, filename) + VALUES (%s, %s) + """ + cursor.execute(query, (video_id, filename)) + db.commit() + inserted_id = cursor.lastrowid + cursor.close() + print(f"Inserted clip for video_id {video_id} with filename '{filename}' (ID: {inserted_id})") + return inserted_id + def db_set_transcription_finish(db, video_id): """ Updates the specified column (e.g. data_downloaded) for the video. @@ -496,7 +520,7 @@ def main(): # Pass language and vod_id so that the transcript is saved and reused if available. segments_data = transcribe_audio_fast(audio_filename, language=channel_language, vod_id=vod_id) - handle_matches_fast(video, segments_data) + handle_matches_fast(db, video, segments_data) db_set_transcription_finish(db, video_id) db_set_video_processed(db, video_id)