insert also clip

This commit is contained in:
t0is 2025-03-26 14:23:36 +01:00
parent 1b54547697
commit cbdfa9f76a

36
main.py
View File

@ -273,7 +273,7 @@ def seconds_to_timestamp(seconds):
secs = int(seconds % 60)
return f"{hours:02}:{minutes:02}:{secs:02}"
def download_vod_segment(vod, match_start, duration=60):
def download_vod_segment(db, vod, match_start, duration=60):
"""
Downloads a segment of a VOD using yt-dlp.
@ -312,11 +312,12 @@ def download_vod_segment(vod, match_start, duration=60):
subprocess.run(command, check=True)
print(f"Downloaded segment from {start_ts} to {end_ts} into {clip_filename}")
insert_clip(db, vod['id'], clip_filename)
# ---------------------------
# Main Processing Pipeline
# ---------------------------
def handle_matches_fast(vod, segments_data):
def handle_matches_fast(db, vod, segments_data):
matches_fast = []
for segment in segments_data:
segment_text = segment["text"].lower()
@ -332,7 +333,7 @@ def handle_matches_fast(vod, segments_data):
text = match["text"]
print(f" - At {start:.2f}s: {text}")
# create_clip_from_vod(video_filename, start, vod)
download_vod_segment(vod, start)
download_vod_segment(db, vod, start)
else:
print("faster_whisper -- No mentions of keywords.")
@ -399,8 +400,6 @@ def insert_transcription(db, video_id, filename):
db: A MariaDB connection object.
video_id (int): The foreign key referencing the videos table.
filename (str): The transcription file name.
transcription_start (datetime, optional): The transcription start time. Defaults to now if None.
transcription_finish (datetime, optional): The transcription finish time. Defaults to None.
Returns:
int: The ID of the inserted transcription record.
@ -418,6 +417,31 @@ def insert_transcription(db, video_id, filename):
print(f"Inserted transcription for video_id {video_id} with filename '{filename}' (ID: {inserted_id})")
return inserted_id
def insert_clip(db, video_id, filename):
"""
Inserts a new transcription record into the transcriptions table.
Parameters:
db: A MariaDB connection object.
video_id (int): The foreign key referencing the videos table.
filename (str): The transcription file name.
Returns:
int: The ID of the inserted transcription record.
"""
cursor = db.cursor()
query = """
INSERT INTO clips (video_id, filename)
VALUES (%s, %s)
"""
cursor.execute(query, (video_id, filename))
db.commit()
inserted_id = cursor.lastrowid
cursor.close()
print(f"Inserted clip for video_id {video_id} with filename '{filename}' (ID: {inserted_id})")
return inserted_id
def db_set_transcription_finish(db, video_id):
"""
Updates the specified column (e.g. data_downloaded) for the video.
@ -496,7 +520,7 @@ def main():
# Pass language and vod_id so that the transcript is saved and reused if available.
segments_data = transcribe_audio_fast(audio_filename, language=channel_language, vod_id=vod_id)
handle_matches_fast(video, segments_data)
handle_matches_fast(db, video, segments_data)
db_set_transcription_finish(db, video_id)
db_set_video_processed(db, video_id)