# models/tts_router.py from __future__ import annotations import os import re import uuid import wave import shutil import subprocess from shutil import which from typing import Optional RUNTIME_AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio")) def ensure_runtime_audio_dir() -> str: os.makedirs(RUNTIME_AUDIO_DIR, exist_ok=True) return RUNTIME_AUDIO_DIR def _have(cmd: str) -> bool: return which(cmd) is not None def _is_valid_wav(path: str) -> bool: try: with wave.open(path, "rb") as w: frames = w.getnframes() rate = w.getframerate() if frames <= 0 or rate <= 0: return False except Exception: return False return True def _tts_with_piper(text: str) -> Optional[str]: """ Use local Piper if available. Requires: - env PIPER_MODEL to point to models/piper/.onnx - `piper` binary in PATH (brew install piper or from releases) """ model = os.getenv("PIPER_MODEL") if not model or not os.path.exists(model): return None if not _have("piper"): return None out_dir = ensure_runtime_audio_dir() out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav") # Avoid stray control chars that can confuse some engines safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip() try: # Simple one-shot pipe p = subprocess.Popen( ["piper", "--model", model, "--output_file", out_path], stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL ) p.communicate(input=safe_text.encode("utf-8"), timeout=30) if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path): return out_path except Exception as e: print("[TTS] Piper error:", e) return None def _tts_with_say(text: str) -> Optional[str]: """ macOS `say` fallback. Produces WAV via afconvert or ffmpeg if present; else writes AIFF and returns it if WAV conversion fails. """ if os.name != "posix": return None if not _have("say"): return None out_dir = ensure_runtime_audio_dir() aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff") wav = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav") safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello." try: # Basic AIFF subprocess.run(["say", "-o", aiff, safe_text], check=True) except Exception as e: print("[TTS] say failed:", e) return None converted = False # Prefer afconvert if which("afconvert"): try: subprocess.run( ["afconvert", "-f", "WAVE", "-d", "LEI16", "-c", "1", "-s", "1", aiff, wav], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL ) converted = True except Exception: converted = False # Else try ffmpeg if not converted and which("ffmpeg"): try: subprocess.run( ["ffmpeg", "-y", "-i", aiff, "-ar", "22050", "-ac", "1", wav], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL ) converted = True except Exception: converted = False # Cleanup/return best if converted and os.path.exists(wav) and _is_valid_wav(wav): try: os.remove(aiff) except Exception: pass return wav # Fallback: return AIFF if WAV conversion failed but aiff exists if os.path.exists(aiff): return aiff return None def tts_synthesize(text: str) -> Optional[str]: """ High-level TTS router: 1) Piper (if configured) 2) macOS 'say' 3) None Always writes to runtime/audio. """ if not (text and text.strip()): return None ensure_runtime_audio_dir() # 1) Piper out = _tts_with_piper(text) if out: return out # 2) macOS say out = _tts_with_say(text) if out: return out return None