From af9e579400b252736f7983f0c59952655432251b Mon Sep 17 00:00:00 2001 From: t0is Date: Fri, 21 Mar 2025 16:09:22 +0100 Subject: [PATCH] cuda image --- docker/transcriptor/Dockerfile | 2 +- main.py | 18 ++++-------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/docker/transcriptor/Dockerfile b/docker/transcriptor/Dockerfile index af6b953..2f5294f 100644 --- a/docker/transcriptor/Dockerfile +++ b/docker/transcriptor/Dockerfile @@ -13,7 +13,7 @@ RUN add-apt-repository ppa:deadsnakes/ppa -y # Install Python 3.9, python3.9-distutils, pip, and other dependencies RUN apt-get update && \ - apt-get install -y python3.9 python3.9-distutils python3-pip ffmpeg jq curl unzip libmariadb-dev gcc && \ + apt-get install -y python3.9 python3.9-dev python3.9-distutils python3-pip ffmpeg jq curl unzip libmariadb-dev gcc && \ rm -rf /var/lib/apt/lists/* # Set python3.9 as the default python3 and upgrade pip diff --git a/main.py b/main.py index 810b05e..0c70e9f 100644 --- a/main.py +++ b/main.py @@ -130,19 +130,6 @@ def download_vod(vod_url, output_filename): subprocess.run(command, check=True) print(f"Downloaded VOD to {output_filename}") -def extract_audio(video_file, audio_file): - if os.path.exists(audio_file): - print(f"{audio_file} already exists. Skipping audio extraction.") - return - command = ["ffmpeg", "-i", video_file, "-vn", "-acodec", "mp3", audio_file, "-y"] - subprocess.run(command, check=True) - print(f"Extracted audio to {audio_file}") - -def transcribe_audio(audio_file, model_name): - model = whisper.load_model(model_name, download_root="/app/models") - result = model.transcribe(audio_file, language=CHANNEL_LANGUAGE) - return result - def transcribe_audio_fast(audio_file, language, vod_id): transcript_path = os.path.join(base_dirs["transcripts"], f"transcript_{vod_id}.json") @@ -438,7 +425,7 @@ def db_set_transcription_finish(db, video_id): """ cursor = db.cursor() transcription_finish = datetime.now() - query = f"UPDATE transcriptions SET transcription_finish = %s WHERE id = %s" + query = f"UPDATE transcriptions SET transcription_finish = %s WHERE video_id = %s" cursor.execute(query, (transcription_finish, video_id)) db.commit() cursor.close() @@ -511,6 +498,9 @@ def main(): handle_matches_fast(video, segments_data) + db_set_transcription_finish(db, video_id) + db_set_video_processed(db, video_id) + except Exception as e: print(f"Error processing video ID {video['id']}: {e}") continue