Spaces:
Sleeping
Sleeping
Upload 18 files
Browse files- .gitattributes +1 -0
- app.py +3 -0
- app/__pycache__/gradio_app.cpython-312.pyc +0 -0
- app/gradio_app.py +88 -6
- runtime/audio/tts_4056705ada224a0092325b697c975501.wav +3 -0
.gitattributes
CHANGED
|
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
runtime/audio/tts_3bac9b920ffa4a6a93a9eed5ca215bea.wav filter=lfs diff=lfs merge=lfs -text
|
| 37 |
runtime/audio/tts_fc786b49aad940e4992413247701abf3.wav filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
runtime/audio/tts_3bac9b920ffa4a6a93a9eed5ca215bea.wav filter=lfs diff=lfs merge=lfs -text
|
| 37 |
runtime/audio/tts_fc786b49aad940e4992413247701abf3.wav filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
runtime/audio/tts_4056705ada224a0092325b697c975501.wav filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -1,4 +1,7 @@
|
|
| 1 |
from app.gradio_app import build_demo
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
def main():
|
| 4 |
demo = build_demo()
|
|
|
|
| 1 |
from app.gradio_app import build_demo
|
| 2 |
+
# in app.py or when building the demo
|
| 3 |
+
from models.tts_router import cleanup_old_audio
|
| 4 |
+
cleanup_old_audio(keep_latest=None) # removes all existing tts_*.wav on boot
|
| 5 |
|
| 6 |
def main():
|
| 7 |
demo = build_demo()
|
app/__pycache__/gradio_app.cpython-312.pyc
CHANGED
|
Binary files a/app/__pycache__/gradio_app.cpython-312.pyc and b/app/__pycache__/gradio_app.cpython-312.pyc differ
|
|
|
app/gradio_app.py
CHANGED
|
@@ -24,7 +24,8 @@ except Exception:
|
|
| 24 |
llm_respond_chat = None
|
| 25 |
|
| 26 |
from models.tts_router import tts_synthesize, ensure_runtime_audio_dir
|
| 27 |
-
|
|
|
|
| 28 |
|
| 29 |
# =============================================================================
|
| 30 |
# Helpers (pure, modular)
|
|
@@ -46,6 +47,15 @@ def _safe_llm_reply(history: List[Dict[str, str]], user_text: str) -> str:
|
|
| 46 |
return "Hello! How can I assist you today? Would you like to place an order or inquire about the menu?"
|
| 47 |
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
def _asr_transcribe(aud_path: str) -> str:
|
| 50 |
"""
|
| 51 |
Transcribe audio to text. If ASR is unavailable, return a safe message.
|
|
@@ -247,13 +257,85 @@ def build_demo():
|
|
| 247 |
# Only clears the recorder input; leaves assistant audio + transcripts intact
|
| 248 |
return gr.update(value=None, interactive=True)
|
| 249 |
|
| 250 |
-
def on_voice_change(aud_path: str | None,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
if not aud_path:
|
| 252 |
-
|
| 253 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
-
|
| 256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
def on_text_send(txt: str, hist: List[Dict[str, str]]):
|
| 259 |
new_hist, diag, clear_text = handle_text_turn(txt, hist or [])
|
|
|
|
| 24 |
llm_respond_chat = None
|
| 25 |
|
| 26 |
from models.tts_router import tts_synthesize, ensure_runtime_audio_dir
|
| 27 |
+
from models.tts_router import tts_synthesize, cleanup_old_audio, AUDIO_DIR
|
| 28 |
+
import shutil, uuid, os
|
| 29 |
|
| 30 |
# =============================================================================
|
| 31 |
# Helpers (pure, modular)
|
|
|
|
| 47 |
return "Hello! How can I assist you today? Would you like to place an order or inquire about the menu?"
|
| 48 |
|
| 49 |
|
| 50 |
+
def _persist_copy(src_path: str) -> str | None:
|
| 51 |
+
"""Copy mic recording into runtime/audio with a stable filename, returns the new path."""
|
| 52 |
+
if not (src_path and os.path.exists(src_path)):
|
| 53 |
+
return None
|
| 54 |
+
os.makedirs(AUDIO_DIR, exist_ok=True)
|
| 55 |
+
dst = os.path.join(AUDIO_DIR, f"user_{uuid.uuid4().hex}.wav")
|
| 56 |
+
shutil.copyfile(src_path, dst)
|
| 57 |
+
return dst
|
| 58 |
+
|
| 59 |
def _asr_transcribe(aud_path: str) -> str:
|
| 60 |
"""
|
| 61 |
Transcribe audio to text. If ASR is unavailable, return a safe message.
|
|
|
|
| 257 |
# Only clears the recorder input; leaves assistant audio + transcripts intact
|
| 258 |
return gr.update(value=None, interactive=True)
|
| 259 |
|
| 260 |
+
def on_voice_change(aud_path: str | None, voice_hist: list[dict] | None):
|
| 261 |
+
"""
|
| 262 |
+
- Copy mic recording into runtime/audio
|
| 263 |
+
- ASR -> transcript
|
| 264 |
+
- LLM -> bot text
|
| 265 |
+
- TTS -> assistant wav in runtime/audio
|
| 266 |
+
- Cleanup:
|
| 267 |
+
* delete the user clip immediately after ASR
|
| 268 |
+
* delete all older TTS, keep only latest one
|
| 269 |
+
- Append transcript pairs to voice chat state
|
| 270 |
+
"""
|
| 271 |
+
import time
|
| 272 |
+
empty_diag = {
|
| 273 |
+
"intent": None,
|
| 274 |
+
"slots": {},
|
| 275 |
+
"tool_selected": None,
|
| 276 |
+
"tool_result": None,
|
| 277 |
+
"latency_ms": 0,
|
| 278 |
+
}
|
| 279 |
if not aud_path:
|
| 280 |
+
return (
|
| 281 |
+
voice_hist or [],
|
| 282 |
+
None, # assistant_audio
|
| 283 |
+
empty_diag,
|
| 284 |
+
None, # clear recorder (handled elsewhere if you chain a clear)
|
| 285 |
+
voice_hist or []
|
| 286 |
+
)
|
| 287 |
|
| 288 |
+
t0 = time.time()
|
| 289 |
+
# 1) Stabilize mic path into runtime/audio
|
| 290 |
+
stable_user = _persist_copy(aud_path)
|
| 291 |
+
|
| 292 |
+
# 2) Transcribe
|
| 293 |
+
try:
|
| 294 |
+
asr = get_asr()
|
| 295 |
+
asr_out = asr.transcribe(stable_user)
|
| 296 |
+
transcript = (asr_out.get("text") or "").strip() or "(no speech detected)"
|
| 297 |
+
finally:
|
| 298 |
+
# Remove the user clip ASAP to keep the folder small
|
| 299 |
+
if stable_user and os.path.exists(stable_user):
|
| 300 |
+
try:
|
| 301 |
+
os.remove(stable_user)
|
| 302 |
+
except Exception as e:
|
| 303 |
+
print("[CLEANUP] Could not delete user clip:", e)
|
| 304 |
+
|
| 305 |
+
# 3) Get bot reply (LLM response)
|
| 306 |
+
try:
|
| 307 |
+
from models.llm_chat import respond_chat_voice
|
| 308 |
+
except Exception:
|
| 309 |
+
# Fallback: reuse text chat function if you don’t have a voice-specific one
|
| 310 |
+
from models.llm_chat import respond_chat as respond_chat_voice
|
| 311 |
+
|
| 312 |
+
bot_text, new_policy, policy_diag = respond_chat_voice(voice_hist or [], transcript, {})
|
| 313 |
+
|
| 314 |
+
# 4) TTS the bot reply into runtime/audio
|
| 315 |
+
new_tts = tts_synthesize(bot_text) # this writes into runtime/audio
|
| 316 |
+
# Keep only the latest TTS (delete older tts_*.wav)
|
| 317 |
+
cleanup_old_audio(keep_latest=new_tts)
|
| 318 |
+
|
| 319 |
+
# 5) Append to voice chat state (text transcripts)
|
| 320 |
+
new_hist = (voice_hist or []) + [
|
| 321 |
+
{"role": "user", "content": transcript},
|
| 322 |
+
{"role": "assistant", "content": bot_text},
|
| 323 |
+
]
|
| 324 |
+
|
| 325 |
+
diag = {
|
| 326 |
+
"intent": policy_diag.get("policy") if isinstance(policy_diag, dict) else None,
|
| 327 |
+
"slots": {},
|
| 328 |
+
"tool_selected": None,
|
| 329 |
+
"tool_result": {
|
| 330 |
+
"transcript": transcript,
|
| 331 |
+
"llm_response": bot_text,
|
| 332 |
+
"policy": policy_diag,
|
| 333 |
+
},
|
| 334 |
+
"latency_ms": int((time.time() - t0) * 1000),
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
# Return: (voice_chat, assistant_audio_path, diag, recorder_clear, voice_state)
|
| 338 |
+
return new_hist, new_tts, diag, gr.update(value=None), new_hist
|
| 339 |
|
| 340 |
def on_text_send(txt: str, hist: List[Dict[str, str]]):
|
| 341 |
new_hist, diag, clear_text = handle_text_turn(txt, hist or [])
|
runtime/audio/tts_4056705ada224a0092325b697c975501.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:429b36576536c38844ca7d9eec6264400595f2a0ec7024cb0044bf0bccefb1f7
|
| 3 |
+
size 337448
|