Eyob-Sol commited on
Commit
51d45ce
·
verified ·
1 Parent(s): f418bc8

Update models/tts_router.py

Browse files
Files changed (1) hide show
  1. models/tts_router.py +60 -71
models/tts_router.py CHANGED
@@ -1,106 +1,102 @@
1
  # models/tts_router.py
2
  from __future__ import annotations
3
-
4
- import os
5
- import re
6
- import uuid
7
- import wave
8
- import glob
9
- import subprocess
10
  from shutil import which
11
  from typing import Optional
12
 
13
- RUNTIME_AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio"))
14
-
15
- AUDIO_DIR = os.path.join(os.path.dirname(__file__), "..", "runtime", "audio")
16
  os.makedirs(AUDIO_DIR, exist_ok=True)
17
 
 
 
 
 
18
  def cleanup_old_audio(keep_latest: Optional[str] = None):
19
- """Delete all audio files in runtime/audio except the one to keep."""
20
  for f in glob.glob(os.path.join(AUDIO_DIR, "*")):
21
- # keep both .wav/.aiff just in case engine produced AIFF
22
  if keep_latest and os.path.abspath(f) == os.path.abspath(keep_latest):
23
  continue
24
  if f.endswith((".wav", ".aiff")):
25
  try:
26
  os.remove(f)
27
  except Exception as e:
28
- print(f"[CLEANUP] Could not delete {f}: {e}")
29
 
30
  def ensure_runtime_audio_dir() -> str:
31
- os.makedirs(RUNTIME_AUDIO_DIR, exist_ok=True)
32
- return RUNTIME_AUDIO_DIR
33
 
34
  def _have(cmd: str) -> bool:
35
- return which(cmd) is not None
 
36
 
37
  def _is_valid_wav(path: str, min_duration_s: float = 0.25) -> bool:
38
  try:
39
  with wave.open(path, "rb") as w:
40
  frames = w.getnframes()
41
- rate = w.getframerate()
42
  dur = (frames / float(rate)) if rate else 0.0
43
- if frames <= 0 or rate <= 0 or dur < min_duration_s:
44
- return False
45
- except Exception:
46
  return False
47
- return True
48
 
49
  def _tts_with_piper(text: str) -> Optional[str]:
50
- """
51
- Use local Piper if available.
52
- Env:
53
- - PIPER_MODEL: path to models/piper/<voice>.onnx
54
- - PIPER_BIN (optional): override binary name/path (default 'piper')
55
- """
56
  model = os.getenv("PIPER_MODEL")
 
57
  if not model or not os.path.exists(model):
 
58
  return None
59
- piper_bin = os.getenv("PIPER_BIN", "piper")
60
- if not _have(piper_bin) and not os.path.isabs(piper_bin):
61
- # If the user passed an absolute path, we try it even if not in PATH
62
  return None
63
 
64
  out_dir = ensure_runtime_audio_dir()
65
  out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
 
 
 
 
 
 
 
 
66
 
67
- safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip()
68
  try:
69
- p = subprocess.Popen(
70
- [piper_bin, "--model", model, "--output_file", out_path],
71
- stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
72
- )
73
- p.communicate(input=safe_text.encode("utf-8"), timeout=45)
74
  if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path):
 
75
  return os.path.abspath(out_path)
 
 
76
  except Exception as e:
77
- print("[TTS] Piper error:", e)
78
  return None
79
 
80
  def _tts_with_say(text: str) -> Optional[str]:
81
- """
82
- macOS `say` fallback. Produces WAV via afconvert or ffmpeg if present;
83
- else returns AIFF path.
84
- Env:
85
- - SAY_VOICE (optional): e.g., "Samantha" / "Alex"
86
- """
87
  if os.name != "posix" or not _have("say"):
 
88
  return None
89
 
90
  out_dir = ensure_runtime_audio_dir()
91
  aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff")
92
- wav = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
93
 
94
- voice = os.getenv("SAY_VOICE")
95
  safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello."
 
 
 
 
96
  try:
97
- cmd = ["say", "-o", aiff]
98
- if voice:
99
- cmd.extend(["-v", voice])
100
- cmd.append(safe_text)
101
  subprocess.run(cmd, check=True)
102
  except Exception as e:
103
- print("[TTS] say failed:", e)
104
  return None
105
 
106
  converted = False
@@ -109,54 +105,47 @@ def _tts_with_say(text: str) -> Optional[str]:
109
  subprocess.run(
110
  ["afconvert", "-f", "WAVE", "-d", "LEI16", "-c", "1", "-s", "1", aiff, wav],
111
  check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
112
- )
113
- converted = True
114
- except Exception:
115
- converted = False
116
  if not converted and which("ffmpeg"):
117
  try:
118
  subprocess.run(
119
  ["ffmpeg", "-y", "-i", aiff, "-ar", "22050", "-ac", "1", wav],
120
  check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
121
- )
122
- converted = True
123
- except Exception:
124
- converted = False
125
 
126
  if converted and os.path.exists(wav) and _is_valid_wav(wav):
127
- try:
128
- os.remove(aiff)
129
- except Exception:
130
- pass
131
  return os.path.abspath(wav)
132
 
133
  if os.path.exists(aiff):
134
- # AIFF is fine as a fallback (Gradio can usually play it)
135
  return os.path.abspath(aiff)
136
-
137
  return None
138
 
139
  def tts_synthesize(text: str) -> Optional[str]:
140
- """
141
- High-level TTS router:
142
- 1) Piper (if configured)
143
- 2) macOS 'say'
144
- 3) None
145
- Always writes to runtime/audio and prunes older files.
146
- """
147
  if not (text and text.strip()):
 
148
  return None
149
 
150
  ensure_runtime_audio_dir()
151
 
 
152
  out = _tts_with_piper(text)
153
  if out:
154
  cleanup_old_audio(keep_latest=out)
155
  return out
156
 
 
157
  out = _tts_with_say(text)
158
  if out:
159
  cleanup_old_audio(keep_latest=out)
160
  return out
161
 
 
162
  return None
 
1
  # models/tts_router.py
2
  from __future__ import annotations
3
+ import os, re, uuid, wave, glob, subprocess, sys
 
 
 
 
 
 
4
  from shutil import which
5
  from typing import Optional
6
 
7
+ # Where we write audio
8
+ AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio"))
 
9
  os.makedirs(AUDIO_DIR, exist_ok=True)
10
 
11
+ def _dbg(*args):
12
+ if os.getenv("DEBUG", "false").lower() in ("1","true","yes","on"):
13
+ print("[TTS]", *args, file=sys.stderr, flush=True)
14
+
15
  def cleanup_old_audio(keep_latest: Optional[str] = None):
 
16
  for f in glob.glob(os.path.join(AUDIO_DIR, "*")):
 
17
  if keep_latest and os.path.abspath(f) == os.path.abspath(keep_latest):
18
  continue
19
  if f.endswith((".wav", ".aiff")):
20
  try:
21
  os.remove(f)
22
  except Exception as e:
23
+ _dbg(f"cleanup skip {f}: {e}")
24
 
25
  def ensure_runtime_audio_dir() -> str:
26
+ os.makedirs(AUDIO_DIR, exist_ok=True)
27
+ return AUDIO_DIR
28
 
29
  def _have(cmd: str) -> bool:
30
+ # allow absolute path (even if not “in PATH”)
31
+ return os.path.isabs(cmd) and os.path.exists(cmd) or which(cmd) is not None
32
 
33
  def _is_valid_wav(path: str, min_duration_s: float = 0.25) -> bool:
34
  try:
35
  with wave.open(path, "rb") as w:
36
  frames = w.getnframes()
37
+ rate = w.getframerate()
38
  dur = (frames / float(rate)) if rate else 0.0
39
+ return frames > 0 and rate > 0 and dur >= min_duration_s
40
+ except Exception as e:
41
+ _dbg("wav check failed:", e)
42
  return False
 
43
 
44
  def _tts_with_piper(text: str) -> Optional[str]:
 
 
 
 
 
 
45
  model = os.getenv("PIPER_MODEL")
46
+ piper_bin = os.getenv("PIPER_BIN", "piper")
47
  if not model or not os.path.exists(model):
48
+ _dbg("piper: missing/invalid PIPER_MODEL:", model)
49
  return None
50
+ if not _have(piper_bin):
51
+ _dbg("piper: binary not found:", piper_bin)
 
52
  return None
53
 
54
  out_dir = ensure_runtime_audio_dir()
55
  out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
56
+ safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip() or "Hello."
57
+
58
+ cmd = [piper_bin, "--model", model, "--output_file", out_path]
59
+ # Optional tuning
60
+ if os.getenv("PIPER_LENGTH_SCALE"):
61
+ cmd += ["--length_scale", os.getenv("PIPER_LENGTH_SCALE")]
62
+ if os.getenv("PIPER_NOISE_SCALE"):
63
+ cmd += ["--noise_scale", os.getenv("PIPER_NOISE_SCALE")]
64
 
65
+ _dbg("piper cmd:", " ".join(cmd))
66
  try:
67
+ p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
68
+ stdout, stderr = p.communicate(input=safe_text.encode("utf-8"), timeout=60)
69
+ _dbg("piper rc:", p.returncode)
70
+ if stderr:
71
+ _dbg("piper stderr:", stderr.decode("utf-8", errors="ignore")[:400])
72
  if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path):
73
+ _dbg("piper ok ->", out_path)
74
  return os.path.abspath(out_path)
75
+ else:
76
+ _dbg("piper wrote invalid or no wav at:", out_path)
77
  except Exception as e:
78
+ _dbg("piper error:", e)
79
  return None
80
 
81
  def _tts_with_say(text: str) -> Optional[str]:
 
 
 
 
 
 
82
  if os.name != "posix" or not _have("say"):
83
+ _dbg("say: not available")
84
  return None
85
 
86
  out_dir = ensure_runtime_audio_dir()
87
  aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff")
88
+ wav = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")
89
 
90
+ voice = os.getenv("SAY_VOICE") # e.g., "Samantha"
91
  safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello."
92
+ cmd = ["say", "-o", aiff]
93
+ if voice: cmd += ["-v", voice]
94
+ cmd.append(safe_text)
95
+ _dbg("say cmd:", " ".join(cmd))
96
  try:
 
 
 
 
97
  subprocess.run(cmd, check=True)
98
  except Exception as e:
99
+ _dbg("say failed:", e)
100
  return None
101
 
102
  converted = False
 
105
  subprocess.run(
106
  ["afconvert", "-f", "WAVE", "-d", "LEI16", "-c", "1", "-s", "1", aiff, wav],
107
  check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
108
+ ); converted = True
109
+ except Exception as e:
110
+ _dbg("afconvert failed:", e)
 
111
  if not converted and which("ffmpeg"):
112
  try:
113
  subprocess.run(
114
  ["ffmpeg", "-y", "-i", aiff, "-ar", "22050", "-ac", "1", wav],
115
  check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
116
+ ); converted = True
117
+ except Exception as e:
118
+ _dbg("ffmpeg failed:", e)
 
119
 
120
  if converted and os.path.exists(wav) and _is_valid_wav(wav):
121
+ try: os.remove(aiff)
122
+ except: pass
123
+ _dbg("say ok ->", wav)
 
124
  return os.path.abspath(wav)
125
 
126
  if os.path.exists(aiff):
127
+ _dbg("say fallback AIFF ->", aiff)
128
  return os.path.abspath(aiff)
 
129
  return None
130
 
131
  def tts_synthesize(text: str) -> Optional[str]:
 
 
 
 
 
 
 
132
  if not (text and text.strip()):
133
+ _dbg("skip empty text")
134
  return None
135
 
136
  ensure_runtime_audio_dir()
137
 
138
+ # Try Piper first (Linux/HF)
139
  out = _tts_with_piper(text)
140
  if out:
141
  cleanup_old_audio(keep_latest=out)
142
  return out
143
 
144
+ # macOS fallback
145
  out = _tts_with_say(text)
146
  if out:
147
  cleanup_old_audio(keep_latest=out)
148
  return out
149
 
150
+ _dbg("no TTS backend produced audio")
151
  return None