test faster_whisper
This commit is contained in:
parent
53512e52f9
commit
4f175772ad
@ -3,15 +3,15 @@
|
||||
{ "name": "herdyn", "language": "cs" },
|
||||
{ "name": "czechcloud", "language": "cs" },
|
||||
{ "name": "duklock", "language": "cs" },
|
||||
{ "name": "sterakdary", "language": "cs" },
|
||||
{ "name": "therasablueberry", "language": "cs" },
|
||||
{ "name": "tensterakdary", "language": "cs" },
|
||||
{ "name": "theresablueberry", "language": "cs" },
|
||||
{ "name": "marwex", "language": "cs" },
|
||||
{ "name": "patrikturi", "language": "cs" },
|
||||
{ "name": "artix", "language": "cs" },
|
||||
{ "name": "spajkk", "language": "cs" },
|
||||
{ "name": "liveoliverr", "language": "cs" },
|
||||
{ "name": "fluffcz", "language": "cs" },
|
||||
{ "name": "astatoro", "language": "cs" },
|
||||
{ "name": "astatoro", "language": "sk" },
|
||||
{ "name": "nestta", "language": "cs" },
|
||||
{ "name": "cantzer", "language": "cs" },
|
||||
{ "name": "kapesnik69", "language": "cs" },
|
||||
|
@ -1,317 +1,11 @@
|
||||
services:
|
||||
scanner_agraelus:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=agraelus
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_artix:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=artix
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_astatoro:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=astatoro
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_cantzer:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=cantzer
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_czechcloud:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=czechcloud
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_duklock:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=duklock
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_esfandtv:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=esfandtv
|
||||
- CHANNEL_LANGUAGE=en
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_fluffcz:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=fluffcz
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_herdyn:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=herdyn
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_kapesnik69:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=kapesnik69
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_kuruhs:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=kuruhs
|
||||
- CHANNEL_LANGUAGE=en
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_liveoliverr:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=liveoliverr
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_marty_vole:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=marty_vole
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_marwex:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=marwex
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_nestta:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=nestta
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_patrikturi:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=patrikturi
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_spajkk:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=spajkk
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_sterakdary:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=sterakdary
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
volumes:
|
||||
- /shared/transcriptor/clips:/app/clips
|
||||
- /shared/transcriptor/vods:/app/vods
|
||||
- /shared/transcriptor/audio:/app/audio
|
||||
- /shared/transcriptor/chat:/app/chat
|
||||
- /shared/transcriptor/models:/app/models
|
||||
- /shared/transcriptor/transcripts:/app/transcripts
|
||||
scanner_therasablueberry:
|
||||
build:
|
||||
context: .
|
||||
environment:
|
||||
- CHANNEL_NAME=therasablueberry
|
||||
- CHANNEL_LANGUAGE=cs
|
||||
- TIMEDELTA_DAYS=4
|
||||
- CHANNEL_LANGUAGE=sk
|
||||
- TIMEDELTA_DAYS=6
|
||||
- TIMEDELTA_DAYS_EXACT=true
|
||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||
|
@ -19,7 +19,7 @@ for channel in channels:
|
||||
"environment": [
|
||||
f"CHANNEL_NAME={channel['name']}",
|
||||
f"CHANNEL_LANGUAGE={channel['language']}",
|
||||
"TIMEDELTA_DAYS=4",
|
||||
"TIMEDELTA_DAYS=6",
|
||||
"TIMEDELTA_DAYS_EXACT=true",
|
||||
"TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov",
|
||||
"TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es"
|
||||
|
56
main.py
56
main.py
@ -2,6 +2,7 @@ import os
|
||||
import subprocess
|
||||
import requests
|
||||
import whisper
|
||||
from faster_whisper import WhisperModel
|
||||
from datetime import datetime, time, timedelta
|
||||
from zoneinfo import ZoneInfo
|
||||
import json
|
||||
@ -142,6 +143,12 @@ def transcribe_audio(audio_file, model_name):
|
||||
result = model.transcribe(audio_file, language=CHANNEL_LANGUAGE)
|
||||
return result
|
||||
|
||||
def transcribe_audio_fast(audio_file, model_name):
|
||||
model_fast = WhisperModel("large-v3-turbo", device="auto", compute_type="int8", download_root="/app/models")
|
||||
segments, info = model_fast.transcribe(audio_file)
|
||||
print("faster_whisper -- Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
||||
return segments
|
||||
|
||||
def search_transcription(result, keywords):
|
||||
matches = []
|
||||
if "segments" in result:
|
||||
@ -245,6 +252,40 @@ def create_clip_from_comment_timestamp(video_file, comment_timestamp, vod_id):
|
||||
# ---------------------------
|
||||
# Main Processing Pipeline
|
||||
# ---------------------------
|
||||
def handle_matches_fast(vod_id, video_filename, result):
|
||||
matches_fast = []
|
||||
for segment in result:
|
||||
segment_text = segment.text.lower()
|
||||
for keyword in SEARCH_KEYWORDS:
|
||||
if keyword.lower() in segment_text:
|
||||
matches_fast.append(segment)
|
||||
break
|
||||
|
||||
if matches_fast:
|
||||
print(f"faster_whisper -- Found {len(matches_fast)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod_id}:")
|
||||
for match in matches_fast:
|
||||
start = match.start # faster-whisper segment attribute
|
||||
text = match.text
|
||||
print(f" - At {start:.2f}s: {text}")
|
||||
create_clip_from_vod(video_filename, start, vod_id)
|
||||
else:
|
||||
print("faster_whisper -- No mentions of keywords.")
|
||||
|
||||
|
||||
def handle_matches(vod_id, video_filename, result):
|
||||
matches = search_transcription(result, SEARCH_KEYWORDS)
|
||||
if matches:
|
||||
print(f"Found {len(matches)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod_id}:")
|
||||
for match in matches:
|
||||
start = match["start"]
|
||||
text = match["text"]
|
||||
print(f" - At {start:.2f}s: {text}")
|
||||
create_clip_from_vod(video_filename, start, vod_id)
|
||||
else:
|
||||
print(f"No mentions of {SEARCH_KEYWORDS} found in VOD {vod_id}.")
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
print("Obtaining access token...")
|
||||
token = get_access_token()
|
||||
@ -280,7 +321,7 @@ def main():
|
||||
result = json.load(f)
|
||||
else:
|
||||
print("Transcribing audio. This may take some time...")
|
||||
result = transcribe_audio(audio_filename, MODEL_NAME)
|
||||
result = transcribe_audio_fast(audio_filename, MODEL_NAME)
|
||||
with open(transcript_filename, "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=4)
|
||||
print(f"Transcript saved to {transcript_filename}")
|
||||
@ -288,16 +329,8 @@ def main():
|
||||
scrape_chat_log(vod_id, chat_log_filename)
|
||||
|
||||
# Search transcript for keywords
|
||||
matches = search_transcription(result, SEARCH_KEYWORDS)
|
||||
if matches:
|
||||
print(f"Found {len(matches)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod_id}:")
|
||||
for match in matches:
|
||||
start = match["start"]
|
||||
text = match["text"]
|
||||
print(f" - At {start:.2f}s: {text}")
|
||||
create_clip_from_vod(video_filename, start, vod_id)
|
||||
else:
|
||||
print(f"No mentions of {SEARCH_KEYWORDS} found in VOD {vod_id}.")
|
||||
# handle_matches(vod_id, video_filename, result)
|
||||
handle_matches_fast(vod_id, video_filename, result)
|
||||
|
||||
# Load chat log from file
|
||||
try:
|
||||
@ -307,6 +340,7 @@ def main():
|
||||
print(f"Error loading chat log: {e}")
|
||||
chat_log = []
|
||||
|
||||
|
||||
# Search chat log using an array of keywords (using the same keywords as for transcript)
|
||||
comment_matches = find_comments_by_keywords(chat_log, SEARCH_KEYWORDS)
|
||||
if comment_matches:
|
||||
|
Loading…
Reference in New Issue
Block a user