cuda image

2025-03-21 16:09:22 +01:00 · 2025-03-21 16:09:22 +01:00 · af9e579400
commit af9e579400
parent 00a4ab1683
2 changed files with 5 additions and 15 deletions
--- a/docker/transcriptor/Dockerfile
+++ b/docker/transcriptor/Dockerfile
@ -13,7 +13,7 @@ RUN add-apt-repository ppa:deadsnakes/ppa -y

 # Install Python 3.9, python3.9-distutils, pip, and other dependencies
 RUN apt-get update && \
-    apt-get install -y python3.9 python3.9-distutils python3-pip ffmpeg jq curl unzip libmariadb-dev gcc && \
+    apt-get install -y python3.9 python3.9-dev python3.9-distutils python3-pip ffmpeg jq curl unzip libmariadb-dev gcc && \
    rm -rf /var/lib/apt/lists/*

 # Set python3.9 as the default python3 and upgrade pip
--- a/main.py
+++ b/main.py
@ -130,19 +130,6 @@ def download_vod(vod_url, output_filename):
    subprocess.run(command, check=True)
    print(f"Downloaded VOD to {output_filename}")

-def extract_audio(video_file, audio_file):
-    if os.path.exists(audio_file):
-        print(f"{audio_file} already exists. Skipping audio extraction.")
-        return
-    command = ["ffmpeg", "-i", video_file, "-vn", "-acodec", "mp3", audio_file, "-y"]
-    subprocess.run(command, check=True)
-    print(f"Extracted audio to {audio_file}")
-
-def transcribe_audio(audio_file, model_name):
-    model = whisper.load_model(model_name, download_root="/app/models")
-    result = model.transcribe(audio_file, language=CHANNEL_LANGUAGE)
-    return result
-
 def transcribe_audio_fast(audio_file, language, vod_id):

    transcript_path = os.path.join(base_dirs["transcripts"], f"transcript_{vod_id}.json")
@ -438,7 +425,7 @@ def db_set_transcription_finish(db, video_id):
    """
    cursor = db.cursor()
    transcription_finish = datetime.now()
-    query = f"UPDATE transcriptions SET transcription_finish = %s WHERE id = %s"
+    query = f"UPDATE transcriptions SET transcription_finish = %s WHERE video_id = %s"
    cursor.execute(query, (transcription_finish, video_id))
    db.commit()
    cursor.close()
@ -511,6 +498,9 @@ def main():

            handle_matches_fast(video, segments_data)

+            db_set_transcription_finish(db, video_id)
+            db_set_video_processed(db, video_id)
+
        except Exception as e:
            print(f"Error processing video ID {video['id']}: {e}")
            continue