test faster_whisper
This commit is contained in:
parent
53512e52f9
commit
4f175772ad
@ -3,15 +3,15 @@
|
|||||||
{ "name": "herdyn", "language": "cs" },
|
{ "name": "herdyn", "language": "cs" },
|
||||||
{ "name": "czechcloud", "language": "cs" },
|
{ "name": "czechcloud", "language": "cs" },
|
||||||
{ "name": "duklock", "language": "cs" },
|
{ "name": "duklock", "language": "cs" },
|
||||||
{ "name": "sterakdary", "language": "cs" },
|
{ "name": "tensterakdary", "language": "cs" },
|
||||||
{ "name": "therasablueberry", "language": "cs" },
|
{ "name": "theresablueberry", "language": "cs" },
|
||||||
{ "name": "marwex", "language": "cs" },
|
{ "name": "marwex", "language": "cs" },
|
||||||
{ "name": "patrikturi", "language": "cs" },
|
{ "name": "patrikturi", "language": "cs" },
|
||||||
{ "name": "artix", "language": "cs" },
|
{ "name": "artix", "language": "cs" },
|
||||||
{ "name": "spajkk", "language": "cs" },
|
{ "name": "spajkk", "language": "cs" },
|
||||||
{ "name": "liveoliverr", "language": "cs" },
|
{ "name": "liveoliverr", "language": "cs" },
|
||||||
{ "name": "fluffcz", "language": "cs" },
|
{ "name": "fluffcz", "language": "cs" },
|
||||||
{ "name": "astatoro", "language": "cs" },
|
{ "name": "astatoro", "language": "sk" },
|
||||||
{ "name": "nestta", "language": "cs" },
|
{ "name": "nestta", "language": "cs" },
|
||||||
{ "name": "cantzer", "language": "cs" },
|
{ "name": "cantzer", "language": "cs" },
|
||||||
{ "name": "kapesnik69", "language": "cs" },
|
{ "name": "kapesnik69", "language": "cs" },
|
||||||
|
@ -1,317 +1,11 @@
|
|||||||
services:
|
services:
|
||||||
scanner_agraelus:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=agraelus
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_artix:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=artix
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_astatoro:
|
scanner_astatoro:
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
environment:
|
environment:
|
||||||
- CHANNEL_NAME=astatoro
|
- CHANNEL_NAME=astatoro
|
||||||
- CHANNEL_LANGUAGE=cs
|
- CHANNEL_LANGUAGE=sk
|
||||||
- TIMEDELTA_DAYS=4
|
- TIMEDELTA_DAYS=6
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_cantzer:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=cantzer
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_czechcloud:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=czechcloud
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_duklock:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=duklock
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_esfandtv:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=esfandtv
|
|
||||||
- CHANNEL_LANGUAGE=en
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_fluffcz:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=fluffcz
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_herdyn:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=herdyn
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_kapesnik69:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=kapesnik69
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_kuruhs:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=kuruhs
|
|
||||||
- CHANNEL_LANGUAGE=en
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_liveoliverr:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=liveoliverr
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_marty_vole:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=marty_vole
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_marwex:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=marwex
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_nestta:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=nestta
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_patrikturi:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=patrikturi
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_spajkk:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=spajkk
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_sterakdary:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=sterakdary
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
|
||||||
volumes:
|
|
||||||
- /shared/transcriptor/clips:/app/clips
|
|
||||||
- /shared/transcriptor/vods:/app/vods
|
|
||||||
- /shared/transcriptor/audio:/app/audio
|
|
||||||
- /shared/transcriptor/chat:/app/chat
|
|
||||||
- /shared/transcriptor/models:/app/models
|
|
||||||
- /shared/transcriptor/transcripts:/app/transcripts
|
|
||||||
scanner_therasablueberry:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
environment:
|
|
||||||
- CHANNEL_NAME=therasablueberry
|
|
||||||
- CHANNEL_LANGUAGE=cs
|
|
||||||
- TIMEDELTA_DAYS=4
|
|
||||||
- TIMEDELTA_DAYS_EXACT=true
|
- TIMEDELTA_DAYS_EXACT=true
|
||||||
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
- TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov
|
||||||
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
- TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es
|
||||||
|
@ -19,7 +19,7 @@ for channel in channels:
|
|||||||
"environment": [
|
"environment": [
|
||||||
f"CHANNEL_NAME={channel['name']}",
|
f"CHANNEL_NAME={channel['name']}",
|
||||||
f"CHANNEL_LANGUAGE={channel['language']}",
|
f"CHANNEL_LANGUAGE={channel['language']}",
|
||||||
"TIMEDELTA_DAYS=4",
|
"TIMEDELTA_DAYS=6",
|
||||||
"TIMEDELTA_DAYS_EXACT=true",
|
"TIMEDELTA_DAYS_EXACT=true",
|
||||||
"TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov",
|
"TWITCH_CLIENT_ID=a0fuj6tm5ct79clvim9816orphqkov",
|
||||||
"TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es"
|
"TWITCH_CLIENT_SECRET=h7whj3yspxgj1909sgcafx6iz1p1es"
|
||||||
|
56
main.py
56
main.py
@ -2,6 +2,7 @@ import os
|
|||||||
import subprocess
|
import subprocess
|
||||||
import requests
|
import requests
|
||||||
import whisper
|
import whisper
|
||||||
|
from faster_whisper import WhisperModel
|
||||||
from datetime import datetime, time, timedelta
|
from datetime import datetime, time, timedelta
|
||||||
from zoneinfo import ZoneInfo
|
from zoneinfo import ZoneInfo
|
||||||
import json
|
import json
|
||||||
@ -142,6 +143,12 @@ def transcribe_audio(audio_file, model_name):
|
|||||||
result = model.transcribe(audio_file, language=CHANNEL_LANGUAGE)
|
result = model.transcribe(audio_file, language=CHANNEL_LANGUAGE)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def transcribe_audio_fast(audio_file, model_name):
|
||||||
|
model_fast = WhisperModel("large-v3-turbo", device="auto", compute_type="int8", download_root="/app/models")
|
||||||
|
segments, info = model_fast.transcribe(audio_file)
|
||||||
|
print("faster_whisper -- Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
||||||
|
return segments
|
||||||
|
|
||||||
def search_transcription(result, keywords):
|
def search_transcription(result, keywords):
|
||||||
matches = []
|
matches = []
|
||||||
if "segments" in result:
|
if "segments" in result:
|
||||||
@ -245,6 +252,40 @@ def create_clip_from_comment_timestamp(video_file, comment_timestamp, vod_id):
|
|||||||
# ---------------------------
|
# ---------------------------
|
||||||
# Main Processing Pipeline
|
# Main Processing Pipeline
|
||||||
# ---------------------------
|
# ---------------------------
|
||||||
|
def handle_matches_fast(vod_id, video_filename, result):
|
||||||
|
matches_fast = []
|
||||||
|
for segment in result:
|
||||||
|
segment_text = segment.text.lower()
|
||||||
|
for keyword in SEARCH_KEYWORDS:
|
||||||
|
if keyword.lower() in segment_text:
|
||||||
|
matches_fast.append(segment)
|
||||||
|
break
|
||||||
|
|
||||||
|
if matches_fast:
|
||||||
|
print(f"faster_whisper -- Found {len(matches_fast)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod_id}:")
|
||||||
|
for match in matches_fast:
|
||||||
|
start = match.start # faster-whisper segment attribute
|
||||||
|
text = match.text
|
||||||
|
print(f" - At {start:.2f}s: {text}")
|
||||||
|
create_clip_from_vod(video_filename, start, vod_id)
|
||||||
|
else:
|
||||||
|
print("faster_whisper -- No mentions of keywords.")
|
||||||
|
|
||||||
|
|
||||||
|
def handle_matches(vod_id, video_filename, result):
|
||||||
|
matches = search_transcription(result, SEARCH_KEYWORDS)
|
||||||
|
if matches:
|
||||||
|
print(f"Found {len(matches)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod_id}:")
|
||||||
|
for match in matches:
|
||||||
|
start = match["start"]
|
||||||
|
text = match["text"]
|
||||||
|
print(f" - At {start:.2f}s: {text}")
|
||||||
|
create_clip_from_vod(video_filename, start, vod_id)
|
||||||
|
else:
|
||||||
|
print(f"No mentions of {SEARCH_KEYWORDS} found in VOD {vod_id}.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
print("Obtaining access token...")
|
print("Obtaining access token...")
|
||||||
token = get_access_token()
|
token = get_access_token()
|
||||||
@ -280,7 +321,7 @@ def main():
|
|||||||
result = json.load(f)
|
result = json.load(f)
|
||||||
else:
|
else:
|
||||||
print("Transcribing audio. This may take some time...")
|
print("Transcribing audio. This may take some time...")
|
||||||
result = transcribe_audio(audio_filename, MODEL_NAME)
|
result = transcribe_audio_fast(audio_filename, MODEL_NAME)
|
||||||
with open(transcript_filename, "w", encoding="utf-8") as f:
|
with open(transcript_filename, "w", encoding="utf-8") as f:
|
||||||
json.dump(result, f, ensure_ascii=False, indent=4)
|
json.dump(result, f, ensure_ascii=False, indent=4)
|
||||||
print(f"Transcript saved to {transcript_filename}")
|
print(f"Transcript saved to {transcript_filename}")
|
||||||
@ -288,16 +329,8 @@ def main():
|
|||||||
scrape_chat_log(vod_id, chat_log_filename)
|
scrape_chat_log(vod_id, chat_log_filename)
|
||||||
|
|
||||||
# Search transcript for keywords
|
# Search transcript for keywords
|
||||||
matches = search_transcription(result, SEARCH_KEYWORDS)
|
# handle_matches(vod_id, video_filename, result)
|
||||||
if matches:
|
handle_matches_fast(vod_id, video_filename, result)
|
||||||
print(f"Found {len(matches)} mention(s) of {SEARCH_KEYWORDS} in VOD {vod_id}:")
|
|
||||||
for match in matches:
|
|
||||||
start = match["start"]
|
|
||||||
text = match["text"]
|
|
||||||
print(f" - At {start:.2f}s: {text}")
|
|
||||||
create_clip_from_vod(video_filename, start, vod_id)
|
|
||||||
else:
|
|
||||||
print(f"No mentions of {SEARCH_KEYWORDS} found in VOD {vod_id}.")
|
|
||||||
|
|
||||||
# Load chat log from file
|
# Load chat log from file
|
||||||
try:
|
try:
|
||||||
@ -307,6 +340,7 @@ def main():
|
|||||||
print(f"Error loading chat log: {e}")
|
print(f"Error loading chat log: {e}")
|
||||||
chat_log = []
|
chat_log = []
|
||||||
|
|
||||||
|
|
||||||
# Search chat log using an array of keywords (using the same keywords as for transcript)
|
# Search chat log using an array of keywords (using the same keywords as for transcript)
|
||||||
comment_matches = find_comments_by_keywords(chat_log, SEARCH_KEYWORDS)
|
comment_matches = find_comments_by_keywords(chat_log, SEARCH_KEYWORDS)
|
||||||
if comment_matches:
|
if comment_matches:
|
||||||
|
Loading…
Reference in New Issue
Block a user