Spaces:
Sleeping
Sleeping
File size: 4,158 Bytes
74bb5fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# models/tts_router.py
from __future__ import annotations
import os
import re
import uuid
import wave
import shutil
import subprocess
from shutil import which
from typing import Optional
RUNTIME_AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio"))
def ensure_runtime_audio_dir() -> str:
os.makedirs(RUNTIME_AUDIO_DIR, exist_ok=True)
return RUNTIME_AUDIO_DIR
def _have(cmd: str) -> bool:
return which(cmd) is not None
def _is_valid_wav(path: str) -> bool:
try:
with wave.open(path, "rb") as w:
frames = w.getnframes()
rate = w.getframerate()
if frames <= 0 or rate <= 0:
return False
except Exception:
return False
return True
def _tts_with_piper(text: str) -> Optional[str]:
"""
Use local Piper if available.
Requires:
- env PIPER_MODEL to point to models/piper/<voice>.onnx
- `piper` binary in PATH (brew install piper or from releases)
"""
model = os.getenv("PIPER_MODEL")
if not model or not os.path.exists(model):
return None
if not _have("piper"):
return None
out_dir = ensure_runtime_audio_dir()
out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
# Avoid stray control chars that can confuse some engines
safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip()
try:
# Simple one-shot pipe
p = subprocess.Popen(
["piper", "--model", model, "--output_file", out_path],
stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
p.communicate(input=safe_text.encode("utf-8"), timeout=30)
if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path):
return out_path
except Exception as e:
print("[TTS] Piper error:", e)
return None
def _tts_with_say(text: str) -> Optional[str]:
"""
macOS `say` fallback. Produces WAV via afconvert or ffmpeg if present;
else writes AIFF and returns it if WAV conversion fails.
"""
if os.name != "posix":
return None
if not _have("say"):
return None
out_dir = ensure_runtime_audio_dir()
aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff")
wav = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello."
try:
# Basic AIFF
subprocess.run(["say", "-o", aiff, safe_text], check=True)
except Exception as e:
print("[TTS] say failed:", e)
return None
converted = False
# Prefer afconvert
if which("afconvert"):
try:
subprocess.run(
["afconvert", "-f", "WAVE", "-d", "LEI16", "-c", "1", "-s", "1", aiff, wav],
check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
converted = True
except Exception:
converted = False
# Else try ffmpeg
if not converted and which("ffmpeg"):
try:
subprocess.run(
["ffmpeg", "-y", "-i", aiff, "-ar", "22050", "-ac", "1", wav],
check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)
converted = True
except Exception:
converted = False
# Cleanup/return best
if converted and os.path.exists(wav) and _is_valid_wav(wav):
try:
os.remove(aiff)
except Exception:
pass
return wav
# Fallback: return AIFF if WAV conversion failed but aiff exists
if os.path.exists(aiff):
return aiff
return None
def tts_synthesize(text: str) -> Optional[str]:
"""
High-level TTS router:
1) Piper (if configured)
2) macOS 'say'
3) None
Always writes to runtime/audio.
"""
if not (text and text.strip()):
return None
ensure_runtime_audio_dir()
# 1) Piper
out = _tts_with_piper(text)
if out:
return out
# 2) macOS say
out = _tts_with_say(text)
if out:
return out
return None |