Spaces:

Eyob-Sol
/

futurecafe-voice-core

Sleeping

App Files Files Community

Eyob-Sol commited on Sep 18

Commit

51d45ce

verified ·

1 Parent(s): f418bc8

Update models/tts_router.py

Browse files

Files changed (1) hide show

models/tts_router.py +60 -71

models/tts_router.py CHANGED Viewed

@@ -1,106 +1,102 @@
 # models/tts_router.py
 from __future__ import annotations
-import os
-import re
-import uuid
-import wave
-import glob
-import subprocess
 from shutil import which
 from typing import Optional
-RUNTIME_AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio"))
-AUDIO_DIR = os.path.join(os.path.dirname(__file__), "..", "runtime", "audio")
 os.makedirs(AUDIO_DIR, exist_ok=True)
 def cleanup_old_audio(keep_latest: Optional[str] = None):
-    """Delete all audio files in runtime/audio except the one to keep."""
     for f in glob.glob(os.path.join(AUDIO_DIR, "*")):
-        # keep both .wav/.aiff just in case engine produced AIFF
         if keep_latest and os.path.abspath(f) == os.path.abspath(keep_latest):
             continue
         if f.endswith((".wav", ".aiff")):
             try:
                 os.remove(f)
             except Exception as e:
-                print(f"[CLEANUP] Could not delete {f}: {e}")
 def ensure_runtime_audio_dir() -> str:
-    os.makedirs(RUNTIME_AUDIO_DIR, exist_ok=True)
-    return RUNTIME_AUDIO_DIR
 def _have(cmd: str) -> bool:
-    return which(cmd) is not None
 def _is_valid_wav(path: str, min_duration_s: float = 0.25) -> bool:
     try:
         with wave.open(path, "rb") as w:
             frames = w.getnframes()
-            rate = w.getframerate()
             dur = (frames / float(rate)) if rate else 0.0
-            if frames <= 0 or rate <= 0 or dur < min_duration_s:
-                return False
-    except Exception:
         return False
-    return True
 def _tts_with_piper(text: str) -> Optional[str]:
-    """
-    Use local Piper if available.
-    Env:
-      - PIPER_MODEL: path to models/piper/<voice>.onnx
-      - PIPER_BIN (optional): override binary name/path (default 'piper')
-    """
     model = os.getenv("PIPER_MODEL")
     if not model or not os.path.exists(model):
         return None
-    piper_bin = os.getenv("PIPER_BIN", "piper")
-    if not _have(piper_bin) and not os.path.isabs(piper_bin):
-        # If the user passed an absolute path, we try it even if not in PATH
         return None
     out_dir = ensure_runtime_audio_dir()
     out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
-    safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip()
     try:
-        p = subprocess.Popen(
-            [piper_bin, "--model", model, "--output_file", out_path],
-            stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
-        )
-        p.communicate(input=safe_text.encode("utf-8"), timeout=45)
         if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path):
             return os.path.abspath(out_path)
     except Exception as e:
-        print("[TTS] Piper error:", e)
     return None
 def _tts_with_say(text: str) -> Optional[str]:
-    """
-    macOS `say` fallback. Produces WAV via afconvert or ffmpeg if present;
-    else returns AIFF path.
-    Env:
-      - SAY_VOICE (optional): e.g., "Samantha" / "Alex"
-    """
     if os.name != "posix" or not _have("say"):
         return None
     out_dir = ensure_runtime_audio_dir()
     aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff")
-    wav = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
-    voice = os.getenv("SAY_VOICE")
     safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello."
     try:
-        cmd = ["say", "-o", aiff]
-        if voice:
-            cmd.extend(["-v", voice])
-        cmd.append(safe_text)
         subprocess.run(cmd, check=True)
     except Exception as e:
-        print("[TTS] say failed:", e)
         return None
     converted = False
@@ -109,54 +105,47 @@ def _tts_with_say(text: str) -> Optional[str]:
             subprocess.run(
                 ["afconvert", "-f", "WAVE", "-d", "LEI16", "-c", "1", "-s", "1", aiff, wav],
                 check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
-            )
-            converted = True
-        except Exception:
-            converted = False
     if not converted and which("ffmpeg"):
         try:
             subprocess.run(
                 ["ffmpeg", "-y", "-i", aiff, "-ar", "22050", "-ac", "1", wav],
                 check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
-            )
-            converted = True
-        except Exception:
-            converted = False
     if converted and os.path.exists(wav) and _is_valid_wav(wav):
-        try:
-            os.remove(aiff)
-        except Exception:
-            pass
         return os.path.abspath(wav)
     if os.path.exists(aiff):
-        # AIFF is fine as a fallback (Gradio can usually play it)
         return os.path.abspath(aiff)
     return None
 def tts_synthesize(text: str) -> Optional[str]:
-    """
-    High-level TTS router:
-      1) Piper (if configured)
-      2) macOS 'say'
-      3) None
-    Always writes to runtime/audio and prunes older files.
-    """
     if not (text and text.strip()):
         return None
     ensure_runtime_audio_dir()
     out = _tts_with_piper(text)
     if out:
         cleanup_old_audio(keep_latest=out)
         return out
     out = _tts_with_say(text)
     if out:
         cleanup_old_audio(keep_latest=out)
         return out
     return None

 # models/tts_router.py
 from __future__ import annotations
+import os, re, uuid, wave, glob, subprocess, sys
 from shutil import which
 from typing import Optional
+# Where we write audio
+AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio"))
 os.makedirs(AUDIO_DIR, exist_ok=True)
+def _dbg(*args):
+    if os.getenv("DEBUG", "false").lower() in ("1","true","yes","on"):
+        print("[TTS]", *args, file=sys.stderr, flush=True)
 def cleanup_old_audio(keep_latest: Optional[str] = None):
     for f in glob.glob(os.path.join(AUDIO_DIR, "*")):
         if keep_latest and os.path.abspath(f) == os.path.abspath(keep_latest):
             continue
         if f.endswith((".wav", ".aiff")):
             try:
                 os.remove(f)
             except Exception as e:
+                _dbg(f"cleanup skip {f}: {e}")
 def ensure_runtime_audio_dir() -> str:
+    os.makedirs(AUDIO_DIR, exist_ok=True)
+    return AUDIO_DIR
 def _have(cmd: str) -> bool:
+    # allow absolute path (even if not “in PATH”)
+    return os.path.isabs(cmd) and os.path.exists(cmd) or which(cmd) is not None
 def _is_valid_wav(path: str, min_duration_s: float = 0.25) -> bool:
     try:
         with wave.open(path, "rb") as w:
             frames = w.getnframes()
+            rate   = w.getframerate()
             dur = (frames / float(rate)) if rate else 0.0
+            return frames > 0 and rate > 0 and dur >= min_duration_s
+    except Exception as e:
+        _dbg("wav check failed:", e)
         return False
 def _tts_with_piper(text: str) -> Optional[str]:
     model = os.getenv("PIPER_MODEL")
+    piper_bin = os.getenv("PIPER_BIN", "piper")
     if not model or not os.path.exists(model):
+        _dbg("piper: missing/invalid PIPER_MODEL:", model)
         return None
+    if not _have(piper_bin):
+        _dbg("piper: binary not found:", piper_bin)
         return None
     out_dir = ensure_runtime_audio_dir()
     out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
+    safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip() or "Hello."
+    cmd = [piper_bin, "--model", model, "--output_file", out_path]
+    # Optional tuning
+    if os.getenv("PIPER_LENGTH_SCALE"):
+        cmd += ["--length_scale", os.getenv("PIPER_LENGTH_SCALE")]
+    if os.getenv("PIPER_NOISE_SCALE"):
+        cmd += ["--noise_scale", os.getenv("PIPER_NOISE_SCALE")]
+    _dbg("piper cmd:", " ".join(cmd))
     try:
+        p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout, stderr = p.communicate(input=safe_text.encode("utf-8"), timeout=60)
+        _dbg("piper rc:", p.returncode)
+        if stderr:
+            _dbg("piper stderr:", stderr.decode("utf-8", errors="ignore")[:400])
         if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path):
+            _dbg("piper ok ->", out_path)
             return os.path.abspath(out_path)
+        else:
+            _dbg("piper wrote invalid or no wav at:", out_path)
     except Exception as e:
+        _dbg("piper error:", e)
     return None
 def _tts_with_say(text: str) -> Optional[str]:
     if os.name != "posix" or not _have("say"):
+        _dbg("say: not available")
         return None
     out_dir = ensure_runtime_audio_dir()
     aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff")
+    wav  = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
+    voice = os.getenv("SAY_VOICE")  # e.g., "Samantha"
     safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello."
+    cmd = ["say", "-o", aiff]
+    if voice: cmd += ["-v", voice]
+    cmd.append(safe_text)
+    _dbg("say cmd:", " ".join(cmd))
     try:
         subprocess.run(cmd, check=True)
     except Exception as e:
+        _dbg("say failed:", e)
         return None
     converted = False
             subprocess.run(
                 ["afconvert", "-f", "WAVE", "-d", "LEI16", "-c", "1", "-s", "1", aiff, wav],
                 check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+            ); converted = True
+        except Exception as e:
+            _dbg("afconvert failed:", e)
     if not converted and which("ffmpeg"):
         try:
             subprocess.run(
                 ["ffmpeg", "-y", "-i", aiff, "-ar", "22050", "-ac", "1", wav],
                 check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+            ); converted = True
+        except Exception as e:
+            _dbg("ffmpeg failed:", e)
     if converted and os.path.exists(wav) and _is_valid_wav(wav):
+        try: os.remove(aiff)
+        except: pass
+        _dbg("say ok ->", wav)
         return os.path.abspath(wav)
     if os.path.exists(aiff):
+        _dbg("say fallback AIFF ->", aiff)
         return os.path.abspath(aiff)
     return None
 def tts_synthesize(text: str) -> Optional[str]:
     if not (text and text.strip()):
+        _dbg("skip empty text")
         return None
     ensure_runtime_audio_dir()
+    # Try Piper first (Linux/HF)
     out = _tts_with_piper(text)
     if out:
         cleanup_old_audio(keep_latest=out)
         return out
+    # macOS fallback
     out = _tts_with_say(text)
     if out:
         cleanup_old_audio(keep_latest=out)
         return out
+    _dbg("no TTS backend produced audio")
     return None