Spaces:
Sleeping
Sleeping
| # models/tts_router.py | |
| from __future__ import annotations | |
| import os | |
| import re | |
| import uuid | |
| import wave | |
| import shutil | |
| import subprocess | |
| from shutil import which | |
| from typing import Optional | |
| RUNTIME_AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio")) | |
| def ensure_runtime_audio_dir() -> str: | |
| os.makedirs(RUNTIME_AUDIO_DIR, exist_ok=True) | |
| return RUNTIME_AUDIO_DIR | |
| def _have(cmd: str) -> bool: | |
| return which(cmd) is not None | |
| def _is_valid_wav(path: str) -> bool: | |
| try: | |
| with wave.open(path, "rb") as w: | |
| frames = w.getnframes() | |
| rate = w.getframerate() | |
| if frames <= 0 or rate <= 0: | |
| return False | |
| except Exception: | |
| return False | |
| return True | |
| def _tts_with_piper(text: str) -> Optional[str]: | |
| """ | |
| Use local Piper if available. | |
| Requires: | |
| - env PIPER_MODEL to point to models/piper/<voice>.onnx | |
| - `piper` binary in PATH (brew install piper or from releases) | |
| """ | |
| model = os.getenv("PIPER_MODEL") | |
| if not model or not os.path.exists(model): | |
| return None | |
| if not _have("piper"): | |
| return None | |
| out_dir = ensure_runtime_audio_dir() | |
| out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav") | |
| # Avoid stray control chars that can confuse some engines | |
| safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip() | |
| try: | |
| # Simple one-shot pipe | |
| p = subprocess.Popen( | |
| ["piper", "--model", model, "--output_file", out_path], | |
| stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL | |
| ) | |
| p.communicate(input=safe_text.encode("utf-8"), timeout=30) | |
| if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path): | |
| return out_path | |
| except Exception as e: | |
| print("[TTS] Piper error:", e) | |
| return None | |
| def _tts_with_say(text: str) -> Optional[str]: | |
| """ | |
| macOS `say` fallback. Produces WAV via afconvert or ffmpeg if present; | |
| else writes AIFF and returns it if WAV conversion fails. | |
| """ | |
| if os.name != "posix": | |
| return None | |
| if not _have("say"): | |
| return None | |
| out_dir = ensure_runtime_audio_dir() | |
| aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff") | |
| wav = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav") | |
| safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello." | |
| try: | |
| # Basic AIFF | |
| subprocess.run(["say", "-o", aiff, safe_text], check=True) | |
| except Exception as e: | |
| print("[TTS] say failed:", e) | |
| return None | |
| converted = False | |
| # Prefer afconvert | |
| if which("afconvert"): | |
| try: | |
| subprocess.run( | |
| ["afconvert", "-f", "WAVE", "-d", "LEI16", "-c", "1", "-s", "1", aiff, wav], | |
| check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL | |
| ) | |
| converted = True | |
| except Exception: | |
| converted = False | |
| # Else try ffmpeg | |
| if not converted and which("ffmpeg"): | |
| try: | |
| subprocess.run( | |
| ["ffmpeg", "-y", "-i", aiff, "-ar", "22050", "-ac", "1", wav], | |
| check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL | |
| ) | |
| converted = True | |
| except Exception: | |
| converted = False | |
| # Cleanup/return best | |
| if converted and os.path.exists(wav) and _is_valid_wav(wav): | |
| try: | |
| os.remove(aiff) | |
| except Exception: | |
| pass | |
| return wav | |
| # Fallback: return AIFF if WAV conversion failed but aiff exists | |
| if os.path.exists(aiff): | |
| return aiff | |
| return None | |
| def tts_synthesize(text: str) -> Optional[str]: | |
| """ | |
| High-level TTS router: | |
| 1) Piper (if configured) | |
| 2) macOS 'say' | |
| 3) None | |
| Always writes to runtime/audio. | |
| """ | |
| if not (text and text.strip()): | |
| return None | |
| ensure_runtime_audio_dir() | |
| # 1) Piper | |
| out = _tts_with_piper(text) | |
| if out: | |
| return out | |
| # 2) macOS say | |
| out = _tts_with_say(text) | |
| if out: | |
| return out | |
| return None |