Spaces:

Eyob-Sol
/

futurecafe-voice-core

Sleeping

App Files Files Community

futurecafe-voice-core / models /tts_router.py

Eyob-Sol

Upload 38 files

74bb5fe verified 3 months ago

raw

history blame

4.16 kB

	# models/tts_router.py
	from __future__ import annotations

	import os
	import re
	import uuid
	import wave
	import shutil
	import subprocess
	from shutil import which
	from typing import Optional

	RUNTIME_AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio"))


	def ensure_runtime_audio_dir() -> str:
	os.makedirs(RUNTIME_AUDIO_DIR, exist_ok=True)
	return RUNTIME_AUDIO_DIR


	def _have(cmd: str) -> bool:
	return which(cmd) is not None


	def _is_valid_wav(path: str) -> bool:
	try:
	with wave.open(path, "rb") as w:
	frames = w.getnframes()
	rate = w.getframerate()
	if frames <= 0 or rate <= 0:
	return False
	except Exception:
	return False
	return True


	def _tts_with_piper(text: str) -> Optional[str]:
	"""
	Use local Piper if available.
	Requires:
	- env PIPER_MODEL to point to models/piper/<voice>.onnx
	- `piper` binary in PATH (brew install piper or from releases)
	"""
	model = os.getenv("PIPER_MODEL")
	if not model or not os.path.exists(model):
	return None
	if not _have("piper"):
	return None

	out_dir = ensure_runtime_audio_dir()
	out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")

	# Avoid stray control chars that can confuse some engines
	safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip()
	try:
	# Simple one-shot pipe
	p = subprocess.Popen(
	["piper", "--model", model, "--output_file", out_path],
	stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
	)
	p.communicate(input=safe_text.encode("utf-8"), timeout=30)
	if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path):
	return out_path
	except Exception as e:
	print("[TTS] Piper error:", e)
	return None


	def _tts_with_say(text: str) -> Optional[str]:
	"""
	macOS `say` fallback. Produces WAV via afconvert or ffmpeg if present;
	else writes AIFF and returns it if WAV conversion fails.
	"""
	if os.name != "posix":
	return None
	if not _have("say"):
	return None

	out_dir = ensure_runtime_audio_dir()
	aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff")
	wav = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")

	safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello."
	try:
	# Basic AIFF
	subprocess.run(["say", "-o", aiff, safe_text], check=True)
	except Exception as e:
	print("[TTS] say failed:", e)
	return None

	converted = False
	# Prefer afconvert
	if which("afconvert"):
	try:
	subprocess.run(
	["afconvert", "-f", "WAVE", "-d", "LEI16", "-c", "1", "-s", "1", aiff, wav],
	check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
	)
	converted = True
	except Exception:
	converted = False
	# Else try ffmpeg
	if not converted and which("ffmpeg"):
	try:
	subprocess.run(
	["ffmpeg", "-y", "-i", aiff, "-ar", "22050", "-ac", "1", wav],
	check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
	)
	converted = True
	except Exception:
	converted = False

	# Cleanup/return best
	if converted and os.path.exists(wav) and _is_valid_wav(wav):
	try:
	os.remove(aiff)
	except Exception:
	pass
	return wav

	# Fallback: return AIFF if WAV conversion failed but aiff exists
	if os.path.exists(aiff):
	return aiff

	return None


	def tts_synthesize(text: str) -> Optional[str]:
	"""
	High-level TTS router:
	1) Piper (if configured)
	2) macOS 'say'
	3) None
	Always writes to runtime/audio.
	"""
	if not (text and text.strip()):
	return None

	ensure_runtime_audio_dir()

	# 1) Piper
	out = _tts_with_piper(text)
	if out:
	return out

	# 2) macOS say
	out = _tts_with_say(text)
	if out:
	return out

	return None