Spaces:

Eyob-Sol
/

futurecafe-voice-core

Sleeping

App Files Files Community

Eyob-Sol commited on Sep 17

Commit

3328f65

verified ·

1 Parent(s): 9ba120a

Upload 18 files

Browse files

Files changed (5) hide show

.gitattributes +1 -0
app.py +3 -0
app/__pycache__/gradio_app.cpython-312.pyc +0 -0
app/gradio_app.py +88 -6
runtime/audio/tts_4056705ada224a0092325b697c975501.wav +3 -0

.gitattributes CHANGED Viewed

@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 runtime/audio/tts_3bac9b920ffa4a6a93a9eed5ca215bea.wav filter=lfs diff=lfs merge=lfs -text
 runtime/audio/tts_fc786b49aad940e4992413247701abf3.wav filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 runtime/audio/tts_3bac9b920ffa4a6a93a9eed5ca215bea.wav filter=lfs diff=lfs merge=lfs -text
 runtime/audio/tts_fc786b49aad940e4992413247701abf3.wav filter=lfs diff=lfs merge=lfs -text
+runtime/audio/tts_4056705ada224a0092325b697c975501.wav filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -1,4 +1,7 @@
 from app.gradio_app import build_demo
 def main():
     demo = build_demo()

 from app.gradio_app import build_demo
+# in app.py or when building the demo
+from models.tts_router import cleanup_old_audio
+cleanup_old_audio(keep_latest=None)  # removes all existing tts_*.wav on boot
 def main():
     demo = build_demo()

app/__pycache__/gradio_app.cpython-312.pyc CHANGED Viewed

Binary files a/app/__pycache__/gradio_app.cpython-312.pyc and b/app/__pycache__/gradio_app.cpython-312.pyc differ

app/gradio_app.py CHANGED Viewed

@@ -24,7 +24,8 @@ except Exception:
     llm_respond_chat = None
 from models.tts_router import tts_synthesize, ensure_runtime_audio_dir
 # =============================================================================
 # Helpers (pure, modular)
@@ -46,6 +47,15 @@ def _safe_llm_reply(history: List[Dict[str, str]], user_text: str) -> str:
     return "Hello! How can I assist you today? Would you like to place an order or inquire about the menu?"
 def _asr_transcribe(aud_path: str) -> str:
     """
     Transcribe audio to text. If ASR is unavailable, return a safe message.
@@ -247,13 +257,85 @@ def build_demo():
             # Only clears the recorder input; leaves assistant audio + transcripts intact
             return gr.update(value=None, interactive=True)
-        def on_voice_change(aud_path: str | None, vhist: List[Dict[str, str]]):
             if not aud_path:
-                # no audio; keep everything as-is
-                return vhist or [], None, {"intent": None, "slots": {}, "tool_selected": None, "tool_result": None, "latency_ms": 0}
-            new_vhist, tts_path, diag = handle_voice_turn(aud_path, vhist or [])
-            return new_vhist, tts_path, diag
         def on_text_send(txt: str, hist: List[Dict[str, str]]):
             new_hist, diag, clear_text = handle_text_turn(txt, hist or [])

     llm_respond_chat = None
 from models.tts_router import tts_synthesize, ensure_runtime_audio_dir
+from models.tts_router import tts_synthesize, cleanup_old_audio, AUDIO_DIR
+import shutil, uuid, os
 # =============================================================================
 # Helpers (pure, modular)
     return "Hello! How can I assist you today? Would you like to place an order or inquire about the menu?"
+def _persist_copy(src_path: str) -> str | None:
+    """Copy mic recording into runtime/audio with a stable filename, returns the new path."""
+    if not (src_path and os.path.exists(src_path)):
+        return None
+    os.makedirs(AUDIO_DIR, exist_ok=True)
+    dst = os.path.join(AUDIO_DIR, f"user_{uuid.uuid4().hex}.wav")
+    shutil.copyfile(src_path, dst)
+    return dst
 def _asr_transcribe(aud_path: str) -> str:
     """
     Transcribe audio to text. If ASR is unavailable, return a safe message.
             # Only clears the recorder input; leaves assistant audio + transcripts intact
             return gr.update(value=None, interactive=True)
+        def on_voice_change(aud_path: str | None, voice_hist: list[dict] | None):
+            """
+            - Copy mic recording into runtime/audio
+            - ASR -> transcript
+            - LLM -> bot text
+            - TTS -> assistant wav in runtime/audio
+            - Cleanup:
+                * delete the user clip immediately after ASR
+                * delete all older TTS, keep only latest one
+            - Append transcript pairs to voice chat state
+            """
+            import time
+            empty_diag = {
+                "intent": None,
+                "slots": {},
+                "tool_selected": None,
+                "tool_result": None,
+                "latency_ms": 0,
+            }
             if not aud_path:
+                return (
+                    voice_hist or [],
+                    None,         # assistant_audio
+                    empty_diag,
+                    None,         # clear recorder (handled elsewhere if you chain a clear)
+                    voice_hist or []
+                )
+            t0 = time.time()
+            # 1) Stabilize mic path into runtime/audio
+            stable_user = _persist_copy(aud_path)
+            # 2) Transcribe
+            try:
+                asr = get_asr()
+                asr_out = asr.transcribe(stable_user)
+                transcript = (asr_out.get("text") or "").strip() or "(no speech detected)"
+            finally:
+                # Remove the user clip ASAP to keep the folder small
+                if stable_user and os.path.exists(stable_user):
+                    try:
+                        os.remove(stable_user)
+                    except Exception as e:
+                        print("[CLEANUP] Could not delete user clip:", e)
+            # 3) Get bot reply (LLM response)
+            try:
+                from models.llm_chat import respond_chat_voice
+            except Exception:
+                # Fallback: reuse text chat function if you don’t have a voice-specific one
+                from models.llm_chat import respond_chat as respond_chat_voice
+            bot_text, new_policy, policy_diag = respond_chat_voice(voice_hist or [], transcript, {})
+            # 4) TTS the bot reply into runtime/audio
+            new_tts = tts_synthesize(bot_text)  # this writes into runtime/audio
+            # Keep only the latest TTS (delete older tts_*.wav)
+            cleanup_old_audio(keep_latest=new_tts)
+            # 5) Append to voice chat state (text transcripts)
+            new_hist = (voice_hist or []) + [
+                {"role": "user", "content": transcript},
+                {"role": "assistant", "content": bot_text},
+            ]
+            diag = {
+                "intent": policy_diag.get("policy") if isinstance(policy_diag, dict) else None,
+                "slots": {},
+                "tool_selected": None,
+                "tool_result": {
+                    "transcript": transcript,
+                    "llm_response": bot_text,
+                    "policy": policy_diag,
+                },
+                "latency_ms": int((time.time() - t0) * 1000),
+            }
+            # Return: (voice_chat, assistant_audio_path, diag, recorder_clear, voice_state)
+            return new_hist, new_tts, diag, gr.update(value=None), new_hist
         def on_text_send(txt: str, hist: List[Dict[str, str]]):
             new_hist, diag, clear_text = handle_text_turn(txt, hist or [])

runtime/audio/tts_4056705ada224a0092325b697c975501.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:429b36576536c38844ca7d9eec6264400595f2a0ec7024cb0044bf0bccefb1f7
+size 337448