insert also clip
This commit is contained in:
parent
1b54547697
commit
cbdfa9f76a
36
main.py
36
main.py
@ -273,7 +273,7 @@ def seconds_to_timestamp(seconds):
|
|||||||
secs = int(seconds % 60)
|
secs = int(seconds % 60)
|
||||||
return f"{hours:02}:{minutes:02}:{secs:02}"
|
return f"{hours:02}:{minutes:02}:{secs:02}"
|
||||||
|
|
||||||
def download_vod_segment(vod, match_start, duration=60):
|
def download_vod_segment(db, vod, match_start, duration=60):
|
||||||
"""
|
"""
|
||||||
Downloads a segment of a VOD using yt-dlp.
|
Downloads a segment of a VOD using yt-dlp.
|
||||||
|
|
||||||
@ -312,11 +312,12 @@ def download_vod_segment(vod, match_start, duration=60):
|
|||||||
|
|
||||||
subprocess.run(command, check=True)
|
subprocess.run(command, check=True)
|
||||||
print(f"Downloaded segment from {start_ts} to {end_ts} into {clip_filename}")
|
print(f"Downloaded segment from {start_ts} to {end_ts} into {clip_filename}")
|
||||||
|
insert_clip(db, vod['id'], clip_filename)
|
||||||
|
|
||||||
# ---------------------------
|
# ---------------------------
|
||||||
# Main Processing Pipeline
|
# Main Processing Pipeline
|
||||||
# ---------------------------
|
# ---------------------------
|
||||||
def handle_matches_fast(vod, segments_data):
|
def handle_matches_fast(db, vod, segments_data):
|
||||||
matches_fast = []
|
matches_fast = []
|
||||||
for segment in segments_data:
|
for segment in segments_data:
|
||||||
segment_text = segment["text"].lower()
|
segment_text = segment["text"].lower()
|
||||||
@ -332,7 +333,7 @@ def handle_matches_fast(vod, segments_data):
|
|||||||
text = match["text"]
|
text = match["text"]
|
||||||
print(f" - At {start:.2f}s: {text}")
|
print(f" - At {start:.2f}s: {text}")
|
||||||
# create_clip_from_vod(video_filename, start, vod)
|
# create_clip_from_vod(video_filename, start, vod)
|
||||||
download_vod_segment(vod, start)
|
download_vod_segment(db, vod, start)
|
||||||
else:
|
else:
|
||||||
print("faster_whisper -- No mentions of keywords.")
|
print("faster_whisper -- No mentions of keywords.")
|
||||||
|
|
||||||
@ -399,8 +400,6 @@ def insert_transcription(db, video_id, filename):
|
|||||||
db: A MariaDB connection object.
|
db: A MariaDB connection object.
|
||||||
video_id (int): The foreign key referencing the videos table.
|
video_id (int): The foreign key referencing the videos table.
|
||||||
filename (str): The transcription file name.
|
filename (str): The transcription file name.
|
||||||
transcription_start (datetime, optional): The transcription start time. Defaults to now if None.
|
|
||||||
transcription_finish (datetime, optional): The transcription finish time. Defaults to None.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
int: The ID of the inserted transcription record.
|
int: The ID of the inserted transcription record.
|
||||||
@ -418,6 +417,31 @@ def insert_transcription(db, video_id, filename):
|
|||||||
print(f"Inserted transcription for video_id {video_id} with filename '{filename}' (ID: {inserted_id})")
|
print(f"Inserted transcription for video_id {video_id} with filename '{filename}' (ID: {inserted_id})")
|
||||||
return inserted_id
|
return inserted_id
|
||||||
|
|
||||||
|
def insert_clip(db, video_id, filename):
|
||||||
|
"""
|
||||||
|
Inserts a new transcription record into the transcriptions table.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
db: A MariaDB connection object.
|
||||||
|
video_id (int): The foreign key referencing the videos table.
|
||||||
|
filename (str): The transcription file name.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int: The ID of the inserted transcription record.
|
||||||
|
"""
|
||||||
|
|
||||||
|
cursor = db.cursor()
|
||||||
|
query = """
|
||||||
|
INSERT INTO clips (video_id, filename)
|
||||||
|
VALUES (%s, %s)
|
||||||
|
"""
|
||||||
|
cursor.execute(query, (video_id, filename))
|
||||||
|
db.commit()
|
||||||
|
inserted_id = cursor.lastrowid
|
||||||
|
cursor.close()
|
||||||
|
print(f"Inserted clip for video_id {video_id} with filename '{filename}' (ID: {inserted_id})")
|
||||||
|
return inserted_id
|
||||||
|
|
||||||
def db_set_transcription_finish(db, video_id):
|
def db_set_transcription_finish(db, video_id):
|
||||||
"""
|
"""
|
||||||
Updates the specified column (e.g. data_downloaded) for the video.
|
Updates the specified column (e.g. data_downloaded) for the video.
|
||||||
@ -496,7 +520,7 @@ def main():
|
|||||||
# Pass language and vod_id so that the transcript is saved and reused if available.
|
# Pass language and vod_id so that the transcript is saved and reused if available.
|
||||||
segments_data = transcribe_audio_fast(audio_filename, language=channel_language, vod_id=vod_id)
|
segments_data = transcribe_audio_fast(audio_filename, language=channel_language, vod_id=vod_id)
|
||||||
|
|
||||||
handle_matches_fast(video, segments_data)
|
handle_matches_fast(db, video, segments_data)
|
||||||
|
|
||||||
db_set_transcription_finish(db, video_id)
|
db_set_transcription_finish(db, video_id)
|
||||||
db_set_video_processed(db, video_id)
|
db_set_video_processed(db, video_id)
|
||||||
|
Loading…
Reference in New Issue
Block a user