Spaces:

Eyob-Sol
/

futurecafe-voice-core

Sleeping

File size: 4,158 Bytes

74bb5fe

# models/tts_router.py
from __future__ import annotations

import os
import re
import uuid
import wave
import shutil
import subprocess
from shutil import which
from typing import Optional

RUNTIME_AUDIO_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "runtime", "audio"))


def ensure_runtime_audio_dir() -> str:
    os.makedirs(RUNTIME_AUDIO_DIR, exist_ok=True)
    return RUNTIME_AUDIO_DIR


def _have(cmd: str) -> bool:
    return which(cmd) is not None


def _is_valid_wav(path: str) -> bool:
    try:
        with wave.open(path, "rb") as w:
            frames = w.getnframes()
            rate = w.getframerate()
            if frames <= 0 or rate <= 0:
                return False
    except Exception:
        return False
    return True


def _tts_with_piper(text: str) -> Optional[str]:
    """
    Use local Piper if available.
    Requires:
      - env PIPER_MODEL to point to models/piper/<voice>.onnx
      - `piper` binary in PATH (brew install piper or from releases)
    """
    model = os.getenv("PIPER_MODEL")
    if not model or not os.path.exists(model):
        return None
    if not _have("piper"):
        return None

    out_dir = ensure_runtime_audio_dir()
    out_path = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")

    # Avoid stray control chars that can confuse some engines
    safe_text = re.sub(r"[\x00-\x1F]+", " ", text).strip()
    try:
        # Simple one-shot pipe
        p = subprocess.Popen(
            ["piper", "--model", model, "--output_file", out_path],
            stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
        )
        p.communicate(input=safe_text.encode("utf-8"), timeout=30)
        if p.returncode == 0 and os.path.exists(out_path) and _is_valid_wav(out_path):
            return out_path
    except Exception as e:
        print("[TTS] Piper error:", e)
    return None


def _tts_with_say(text: str) -> Optional[str]:
    """
    macOS `say` fallback. Produces WAV via afconvert or ffmpeg if present;
    else writes AIFF and returns it if WAV conversion fails.
    """
    if os.name != "posix":
        return None
    if not _have("say"):
        return None

    out_dir = ensure_runtime_audio_dir()
    aiff = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.aiff")
    wav = os.path.join(out_dir, f"tts_{uuid.uuid4().hex}.wav")

    safe_text = re.sub(r"[\x00-\x1F`<>]+", " ", text).strip() or "Hello."
    try:
        # Basic AIFF
        subprocess.run(["say", "-o", aiff, safe_text], check=True)
    except Exception as e:
        print("[TTS] say failed:", e)
        return None

    converted = False
    # Prefer afconvert
    if which("afconvert"):
        try:
            subprocess.run(
                ["afconvert", "-f", "WAVE", "-d", "LEI16", "-c", "1", "-s", "1", aiff, wav],
                check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
            )
            converted = True
        except Exception:
            converted = False
    # Else try ffmpeg
    if not converted and which("ffmpeg"):
        try:
            subprocess.run(
                ["ffmpeg", "-y", "-i", aiff, "-ar", "22050", "-ac", "1", wav],
                check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
            )
            converted = True
        except Exception:
            converted = False

    # Cleanup/return best
    if converted and os.path.exists(wav) and _is_valid_wav(wav):
        try:
            os.remove(aiff)
        except Exception:
            pass
        return wav

    # Fallback: return AIFF if WAV conversion failed but aiff exists
    if os.path.exists(aiff):
        return aiff

    return None


def tts_synthesize(text: str) -> Optional[str]:
    """
    High-level TTS router:
      1) Piper (if configured)
      2) macOS 'say'
      3) None
    Always writes to runtime/audio.
    """
    if not (text and text.strip()):
        return None

    ensure_runtime_audio_dir()

    # 1) Piper
    out = _tts_with_piper(text)
    if out:
        return out

    # 2) macOS say
    out = _tts_with_say(text)
    if out:
        return out

    return None