Upload 6 files
Browse files- audio_tools.py +12 -1
- config.yaml +2 -2
- llm_router.py +17 -11
audio_tools.py
CHANGED
|
@@ -119,7 +119,18 @@ def transcribe_audio_remote(audio_path: str | Path, cfg: Dict[str, Any]) -> Dict
|
|
| 119 |
"timestamps": True,
|
| 120 |
"diarization": False, # diarization stays local
|
| 121 |
}
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
if isinstance(result, str):
|
| 125 |
return {"text": result, "segments": []}
|
|
|
|
| 119 |
"timestamps": True,
|
| 120 |
"diarization": False, # diarization stays local
|
| 121 |
}
|
| 122 |
+
try:
|
| 123 |
+
result = router.asr_transcribe(str(audio_path), model=model_name, **params)
|
| 124 |
+
except Exception as e:
|
| 125 |
+
try:
|
| 126 |
+
import httpx
|
| 127 |
+
if isinstance(e, httpx.ReadTimeout):
|
| 128 |
+
log.warning(f"ASR timeout for {audio_path}: {e}")
|
| 129 |
+
return {"text": "", "segments": []}
|
| 130 |
+
except Exception:
|
| 131 |
+
pass
|
| 132 |
+
log.warning(f"ASR error for {audio_path}: {e}")
|
| 133 |
+
return {"text": "", "segments": []}
|
| 134 |
|
| 135 |
if isinstance(result, str):
|
| 136 |
return {"text": result, "segments": []}
|
config.yaml
CHANGED
|
@@ -55,8 +55,8 @@ audio_processing:
|
|
| 55 |
|
| 56 |
diarization:
|
| 57 |
enabled: true
|
| 58 |
-
min_segment_duration:
|
| 59 |
-
max_segment_duration:
|
| 60 |
|
| 61 |
enable_voice_embeddings: true # SpeechBrain ECAPA
|
| 62 |
speaker_embedding:
|
|
|
|
| 55 |
|
| 56 |
diarization:
|
| 57 |
enabled: true
|
| 58 |
+
min_segment_duration: 0.5 # en segundos (clips cortos)
|
| 59 |
+
max_segment_duration: 10.0
|
| 60 |
|
| 61 |
enable_voice_embeddings: true # SpeechBrain ECAPA
|
| 62 |
speaker_embedding:
|
llm_router.py
CHANGED
|
@@ -23,18 +23,20 @@ class LLMRouter:
|
|
| 23 |
token_enabled = cfg.get("security", {}).get("use_hf_token", False)
|
| 24 |
hf_token = os.getenv(cfg.get("security", {}).get("hf_token_env", "HF_TOKEN")) if token_enabled else None
|
| 25 |
|
| 26 |
-
def
|
| 27 |
info = eps.get(endpoint_key, {})
|
| 28 |
base_url = info.get("base_url") or f"https://{base_user}-{info.get('space')}.hf.space"
|
| 29 |
use_gradio = (info.get("client", "gradio") == "gradio")
|
| 30 |
timeout = int(cfg.get("remote_spaces", {}).get("http", {}).get("timeout_seconds", 180))
|
| 31 |
-
|
|
|
|
|
|
|
| 32 |
|
| 33 |
-
self.
|
| 34 |
-
"salamandra-instruct":
|
| 35 |
-
"salamandra-vision":
|
| 36 |
-
"salamandra-tools":
|
| 37 |
-
"whisper-catalan":
|
| 38 |
}
|
| 39 |
|
| 40 |
self.service_names = {
|
|
@@ -56,7 +58,8 @@ class LLMRouter:
|
|
| 56 |
if model in self.rem:
|
| 57 |
self._log_connect(model, "connect")
|
| 58 |
t0 = time.time()
|
| 59 |
-
|
|
|
|
| 60 |
self._log_connect(model, "done", time.time() - t0)
|
| 61 |
return out
|
| 62 |
raise RuntimeError(f"Modelo local no implementado para: {model}")
|
|
@@ -66,7 +69,8 @@ class LLMRouter:
|
|
| 66 |
if model in self.rem:
|
| 67 |
self._log_connect(model, "connect")
|
| 68 |
t0 = time.time()
|
| 69 |
-
|
|
|
|
| 70 |
self._log_connect(model, "done", time.time() - t0)
|
| 71 |
return out
|
| 72 |
raise RuntimeError(f"Modelo local no implementado para: {model}")
|
|
@@ -76,7 +80,8 @@ class LLMRouter:
|
|
| 76 |
if model in self.rem:
|
| 77 |
self._log_connect(model, "connect")
|
| 78 |
t0 = time.time()
|
| 79 |
-
|
|
|
|
| 80 |
self._log_connect(model, "done", time.time() - t0)
|
| 81 |
return out
|
| 82 |
raise RuntimeError(f"Modelo local no implementado para: {model}")
|
|
@@ -86,7 +91,8 @@ class LLMRouter:
|
|
| 86 |
if model in self.rem:
|
| 87 |
self._log_connect(model, "connect")
|
| 88 |
t0 = time.time()
|
| 89 |
-
|
|
|
|
| 90 |
self._log_connect(model, "done", time.time() - t0)
|
| 91 |
return out
|
| 92 |
raise RuntimeError(f"Modelo local no implementado para: {model}")
|
|
|
|
| 23 |
token_enabled = cfg.get("security", {}).get("use_hf_token", False)
|
| 24 |
hf_token = os.getenv(cfg.get("security", {}).get("hf_token_env", "HF_TOKEN")) if token_enabled else None
|
| 25 |
|
| 26 |
+
def mk_factory(endpoint_key: str, cls):
|
| 27 |
info = eps.get(endpoint_key, {})
|
| 28 |
base_url = info.get("base_url") or f"https://{base_user}-{info.get('space')}.hf.space"
|
| 29 |
use_gradio = (info.get("client", "gradio") == "gradio")
|
| 30 |
timeout = int(cfg.get("remote_spaces", {}).get("http", {}).get("timeout_seconds", 180))
|
| 31 |
+
def _factory():
|
| 32 |
+
return cls(base_url=base_url, use_gradio=use_gradio, hf_token=hf_token, timeout=timeout)
|
| 33 |
+
return _factory
|
| 34 |
|
| 35 |
+
self.client_factories = {
|
| 36 |
+
"salamandra-instruct": mk_factory("salamandra-instruct", InstructClient),
|
| 37 |
+
"salamandra-vision": mk_factory("salamandra-vision", VisionClient),
|
| 38 |
+
"salamandra-tools": mk_factory("salamandra-tools", ToolsClient),
|
| 39 |
+
"whisper-catalan": mk_factory("whisper-catalan", ASRClient),
|
| 40 |
}
|
| 41 |
|
| 42 |
self.service_names = {
|
|
|
|
| 58 |
if model in self.rem:
|
| 59 |
self._log_connect(model, "connect")
|
| 60 |
t0 = time.time()
|
| 61 |
+
client = self.client_factories[model]()
|
| 62 |
+
out = client.generate(prompt, system=system, **kwargs) # type: ignore
|
| 63 |
self._log_connect(model, "done", time.time() - t0)
|
| 64 |
return out
|
| 65 |
raise RuntimeError(f"Modelo local no implementado para: {model}")
|
|
|
|
| 69 |
if model in self.rem:
|
| 70 |
self._log_connect(model, "connect")
|
| 71 |
t0 = time.time()
|
| 72 |
+
client = self.client_factories[model]()
|
| 73 |
+
out = client.describe(image_paths, context=context, **kwargs) # type: ignore
|
| 74 |
self._log_connect(model, "done", time.time() - t0)
|
| 75 |
return out
|
| 76 |
raise RuntimeError(f"Modelo local no implementado para: {model}")
|
|
|
|
| 80 |
if model in self.rem:
|
| 81 |
self._log_connect(model, "connect")
|
| 82 |
t0 = time.time()
|
| 83 |
+
client = self.client_factories[model]()
|
| 84 |
+
out = client.chat(messages, tools=tools, **kwargs) # type: ignore
|
| 85 |
self._log_connect(model, "done", time.time() - t0)
|
| 86 |
return out
|
| 87 |
raise RuntimeError(f"Modelo local no implementado para: {model}")
|
|
|
|
| 91 |
if model in self.rem:
|
| 92 |
self._log_connect(model, "connect")
|
| 93 |
t0 = time.time()
|
| 94 |
+
client = self.client_factories[model]()
|
| 95 |
+
out = client.transcribe(audio_path, **kwargs) # type: ignore
|
| 96 |
self._log_connect(model, "done", time.time() - t0)
|
| 97 |
return out
|
| 98 |
raise RuntimeError(f"Modelo local no implementado para: {model}")
|