test faster_whisper

This commit is contained in:
t0is 2025-02-23 12:08:38 +01:00
parent 2eb03ff103
commit 2f4eb523f6

25
main.py
View File

@ -314,17 +314,20 @@ def main():
download_vod(vod_url, video_filename) download_vod(vod_url, video_filename)
extract_audio(video_filename, audio_filename) extract_audio(video_filename, audio_filename)
# Check if transcript already exists; if yes, load it, otherwise transcribe and save. # # Check if transcript already exists; if yes, load it, otherwise transcribe and save.
if os.path.exists(transcript_filename): # if os.path.exists(transcript_filename):
print(f"{transcript_filename} already exists. Skipping transcription.") # print(f"{transcript_filename} already exists. Skipping transcription.")
with open(transcript_filename, "r", encoding="utf-8") as f: # with open(transcript_filename, "r", encoding="utf-8") as f:
result = json.load(f) # result = json.load(f)
else: # else:
print("Transcribing audio. This may take some time...") # print("Transcribing audio. This may take some time...")
result = transcribe_audio_fast(audio_filename, MODEL_NAME) # result = transcribe_audio(audio_filename, MODEL_NAME)
with open(transcript_filename, "w", encoding="utf-8") as f: # with open(transcript_filename, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=4) # json.dump(result, f, ensure_ascii=False, indent=4)
print(f"Transcript saved to {transcript_filename}") # print(f"Transcript saved to {transcript_filename}")
print("Transcribing audio. This may take some time...")
result = transcribe_audio_fast(audio_filename, MODEL_NAME)
scrape_chat_log(vod_id, chat_log_filename) scrape_chat_log(vod_id, chat_log_filename)