Spaces:
Running
Running
If the user does not choose a language for Whisper,
Browse filesthe detected language by Whisper will be automatically set for the nllb model to avoid abnormal errors when determining the source language in nllb.
- app.py +16 -12
- src/vad.py +2 -2
- webui.bat +1 -1
app.py
CHANGED
|
@@ -20,9 +20,7 @@ from src.diarization.diarizationContainer import DiarizationContainer
|
|
| 20 |
from src.hooks.progressListener import ProgressListener
|
| 21 |
from src.hooks.subTaskProgressListener import SubTaskProgressListener
|
| 22 |
from src.hooks.whisperProgressHook import create_progress_listener_handle
|
| 23 |
-
from src.languages import _TO_LANGUAGE_CODE
|
| 24 |
-
from src.languages import get_language_names
|
| 25 |
-
from src.languages import get_language_from_name
|
| 26 |
from src.modelCache import ModelCache
|
| 27 |
from src.prompts.jsonPromptStrategy import JsonPromptStrategy
|
| 28 |
from src.prompts.prependPromptStrategy import PrependPromptStrategy
|
|
@@ -269,6 +267,10 @@ class WhisperTranscriber:
|
|
| 269 |
|
| 270 |
# Transcribe
|
| 271 |
result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
short_name, suffix = source.get_short_name_suffix(max_length=self.app_config.input_max_file_name_length)
|
| 273 |
filePrefix = slugify(source_prefix + short_name, allow_unicode=True)
|
| 274 |
|
|
@@ -700,8 +702,8 @@ def create_ui(app_config: ApplicationConfig):
|
|
| 700 |
|
| 701 |
common_output = lambda : [
|
| 702 |
gr.File(label="Download"),
|
| 703 |
-
gr.Text(label="Transcription"),
|
| 704 |
-
gr.Text(label="Segments"),
|
| 705 |
]
|
| 706 |
|
| 707 |
is_queue_mode = app_config.queue_concurrency_count is not None and app_config.queue_concurrency_count > 0
|
|
@@ -863,13 +865,15 @@ if __name__ == '__main__':
|
|
| 863 |
|
| 864 |
updated_config = default_app_config.update(**args)
|
| 865 |
|
| 866 |
-
#updated_config.whisper_implementation = "faster-whisper"
|
| 867 |
-
#updated_config.input_audio_max_duration = -1
|
| 868 |
-
#updated_config.default_model_name = "large-v2"
|
| 869 |
-
#updated_config.output_dir = "output"
|
| 870 |
-
#updated_config.vad_max_merge_size = 90
|
| 871 |
-
#updated_config.merge_subtitle_with_sources =
|
| 872 |
-
#updated_config.autolaunch = True
|
|
|
|
|
|
|
| 873 |
|
| 874 |
if (threads := args.pop("threads")) > 0:
|
| 875 |
torch.set_num_threads(threads)
|
|
|
|
| 20 |
from src.hooks.progressListener import ProgressListener
|
| 21 |
from src.hooks.subTaskProgressListener import SubTaskProgressListener
|
| 22 |
from src.hooks.whisperProgressHook import create_progress_listener_handle
|
| 23 |
+
from src.languages import _TO_LANGUAGE_CODE, get_language_names, get_language_from_name, get_language_from_code
|
|
|
|
|
|
|
| 24 |
from src.modelCache import ModelCache
|
| 25 |
from src.prompts.jsonPromptStrategy import JsonPromptStrategy
|
| 26 |
from src.prompts.prependPromptStrategy import PrependPromptStrategy
|
|
|
|
| 267 |
|
| 268 |
# Transcribe
|
| 269 |
result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
|
| 270 |
+
if whisper_lang is None and result["language"] is not None and len(result["language"]) > 0:
|
| 271 |
+
whisper_lang = get_language_from_code(result["language"])
|
| 272 |
+
nllb_model.whisper_lang = whisper_lang
|
| 273 |
+
|
| 274 |
short_name, suffix = source.get_short_name_suffix(max_length=self.app_config.input_max_file_name_length)
|
| 275 |
filePrefix = slugify(source_prefix + short_name, allow_unicode=True)
|
| 276 |
|
|
|
|
| 702 |
|
| 703 |
common_output = lambda : [
|
| 704 |
gr.File(label="Download"),
|
| 705 |
+
gr.Text(label="Transcription", autoscroll=False),
|
| 706 |
+
gr.Text(label="Segments", autoscroll=False),
|
| 707 |
]
|
| 708 |
|
| 709 |
is_queue_mode = app_config.queue_concurrency_count is not None and app_config.queue_concurrency_count > 0
|
|
|
|
| 865 |
|
| 866 |
updated_config = default_app_config.update(**args)
|
| 867 |
|
| 868 |
+
# updated_config.whisper_implementation = "faster-whisper"
|
| 869 |
+
# updated_config.input_audio_max_duration = -1
|
| 870 |
+
# updated_config.default_model_name = "large-v2"
|
| 871 |
+
# updated_config.output_dir = "output"
|
| 872 |
+
# updated_config.vad_max_merge_size = 90
|
| 873 |
+
# updated_config.merge_subtitle_with_sources = False
|
| 874 |
+
# updated_config.autolaunch = True
|
| 875 |
+
# updated_config.auto_parallel = False
|
| 876 |
+
# updated_config.save_downloaded_files = True
|
| 877 |
|
| 878 |
if (threads := args.pop("threads")) > 0:
|
| 879 |
torch.set_num_threads(threads)
|
src/vad.py
CHANGED
|
@@ -205,7 +205,7 @@ class AbstractTranscription(ABC):
|
|
| 205 |
# Detected language
|
| 206 |
detected_language = languageCounter.most_common(1)[0][0] if len(languageCounter) > 0 else None
|
| 207 |
|
| 208 |
-
print("Running whisper from ", format_timestamp(segment_start), " to ", format_timestamp(segment_end), ", duration: ",
|
| 209 |
segment_duration, "expanded: ", segment_expand_amount, ", prompt: ", segment_prompt, ", detected language: ", detected_language)
|
| 210 |
|
| 211 |
perf_start_time = time.perf_counter()
|
|
@@ -217,7 +217,7 @@ class AbstractTranscription(ABC):
|
|
| 217 |
segment_result = whisperCallable.invoke(segment_audio, segment_index, segment_prompt, detected_language, progress_listener=scaled_progress_listener)
|
| 218 |
|
| 219 |
perf_end_time = time.perf_counter()
|
| 220 |
-
print("
|
| 221 |
|
| 222 |
adjusted_segments = self.adjust_timestamp(segment_result["segments"], adjust_seconds=segment_start, max_source_time=segment_duration)
|
| 223 |
|
|
|
|
| 205 |
# Detected language
|
| 206 |
detected_language = languageCounter.most_common(1)[0][0] if len(languageCounter) > 0 else None
|
| 207 |
|
| 208 |
+
print(f"Running whisper {idx}: from ", format_timestamp(segment_start), " to ", format_timestamp(segment_end), ", duration: ",
|
| 209 |
segment_duration, "expanded: ", segment_expand_amount, ", prompt: ", segment_prompt, ", detected language: ", detected_language)
|
| 210 |
|
| 211 |
perf_start_time = time.perf_counter()
|
|
|
|
| 217 |
segment_result = whisperCallable.invoke(segment_audio, segment_index, segment_prompt, detected_language, progress_listener=scaled_progress_listener)
|
| 218 |
|
| 219 |
perf_end_time = time.perf_counter()
|
| 220 |
+
print("\tWhisper took {} seconds".format(perf_end_time - perf_start_time))
|
| 221 |
|
| 222 |
adjusted_segments = self.adjust_timestamp(segment_result["segments"], adjust_seconds=segment_start, max_source_time=segment_duration)
|
| 223 |
|
webui.bat
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
@echo off
|
| 2 |
|
| 3 |
:: The source of the webui.bat file is stable-diffusion-webui
|
| 4 |
-
set COMMANDLINE_ARGS=--whisper_implementation faster-whisper --input_audio_max_duration -1 --default_model_name large-v2 --auto_parallel True --output_dir output --vad_max_merge_size 90 --
|
| 5 |
|
| 6 |
if not defined PYTHON (set PYTHON=python)
|
| 7 |
if not defined VENV_DIR (set "VENV_DIR=%~dp0%venv")
|
|
|
|
| 1 |
@echo off
|
| 2 |
|
| 3 |
:: The source of the webui.bat file is stable-diffusion-webui
|
| 4 |
+
set COMMANDLINE_ARGS=--whisper_implementation faster-whisper --input_audio_max_duration -1 --default_model_name large-v2 --auto_parallel True --output_dir output --vad_max_merge_size 90 --save_downloaded_files --autolaunch
|
| 5 |
|
| 6 |
if not defined PYTHON (set PYTHON=python)
|
| 7 |
if not defined VENV_DIR (set "VENV_DIR=%~dp0%venv")
|