Eyob-Sol's picture
Upload 38 files
74bb5fe verified
raw
history blame
4.16 kB
# models/tts_router.py
from __future__ import annotations
import os
import re
import uuid
import wave
import shutil
import subprocess
from shutil import which
from typing import Optional
RUNTIME_AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio"))
def ensure_runtime_audio_dir() -> str:
os.makedirs(RUNTIME_AUDIO_DIR, exist_ok=True)
return RUNTIME_AUDIO_DIR
def _have(cmd: str) -> bool:
return which(cmd) is not None
def _is_valid_wav(path: str) -> bool:
try:
with wave.open(path, "rb") as w:
frames = w.getnframes()
rate = w.getframerate()
if frames <= 0 or rate <= 0:
return False
except Exception:
return False
return True
def _tts_with_piper(text: str) -> Optional[str]:
"""
Use local Piper if available.
Requires:
- env PIPER_MODEL to point to models/piper/<voice>.onnx
- `piper` binary in PATH (brew install piper or from releases)
"""
model = os.getenv("PIPER_MODEL")
if not model or not os.path.exists(model):
return None
if not _have("piper"):
return None
out_dir = ensure_runtime_audio_dir()
out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
# Avoid stray control chars that can confuse some engines
safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip()
try:
# Simple one-shot pipe
p = subprocess.Popen(
["piper", "--model", model, "--output_file", out_path],
stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
p.communicate(input=safe_text.encode("utf-8"), timeout=30)
if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path):
return out_path
except Exception as e:
print("[TTS] Piper error:", e)
return None
def _tts_with_say(text: str) -> Optional[str]:
"""
macOS `say` fallback. Produces WAV via afconvert or ffmpeg if present;
else writes AIFF and returns it if WAV conversion fails.
"""
if os.name != "posix":
return None
if not _have("say"):
return None
out_dir = ensure_runtime_audio_dir()
aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff")
wav = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello."
try:
# Basic AIFF
subprocess.run(["say", "-o", aiff, safe_text], check=True)
except Exception as e:
print("[TTS] say failed:", e)
return None
converted = False
# Prefer afconvert
if which("afconvert"):
try:
subprocess.run(
["afconvert", "-f", "WAVE", "-d", "LEI16", "-c", "1", "-s", "1", aiff, wav],
check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
converted = True
except Exception:
converted = False
# Else try ffmpeg
if not converted and which("ffmpeg"):
try:
subprocess.run(
["ffmpeg", "-y", "-i", aiff, "-ar", "22050", "-ac", "1", wav],
check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
converted = True
except Exception:
converted = False
# Cleanup/return best
if converted and os.path.exists(wav) and _is_valid_wav(wav):
try:
os.remove(aiff)
except Exception:
pass
return wav
# Fallback: return AIFF if WAV conversion failed but aiff exists
if os.path.exists(aiff):
return aiff
return None
def tts_synthesize(text: str) -> Optional[str]:
"""
High-level TTS router:
1) Piper (if configured)
2) macOS 'say'
3) None
Always writes to runtime/audio.
"""
if not (text and text.strip()):
return None
ensure_runtime_audio_dir()
# 1) Piper
out = _tts_with_piper(text)
if out:
return out
# 2) macOS say
out = _tts_with_say(text)
if out:
return out
return None