import gradio as gr
from TTS.api import TTS
import time
import os

YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
FIXED_SPEAKER_PATH = "speakers/voice2.wav"
tts = TTS(YOURTTS_MODEL, gpu=False)

def synthesize(text):
    output_path = "output.wav"
    start_time = time.time()
    if not os.path.exists(FIXED_SPEAKER_PATH):
        return None, {"error": f"❌ Speaker file not found: {FIXED_SPEAKER_PATH}"}

    try:
        tts.tts_to_file(
            text=text,
            speaker_wav=FIXED_SPEAKER_PATH,
            file_path=output_path,
            language="en"
        )
    except Exception as e:
        return None, {"error": str(e)}

    total_time = time.time() - start_time
    est_duration = len(text.split()) / 2.5
    rtf = round(total_time / est_duration, 3)
    return output_path, {
        "language": "English",
        "processing_time_sec": round(total_time, 3),
        "real_time_factor": rtf,
        "model_used": YOURTTS_MODEL,
        "speaker_used": os.path.basename(FIXED_SPEAKER_PATH)
    }

# ✅ Define both the UI and API on the same Interface object
demo = gr.Interface(
    fn=synthesize,
    inputs=[gr.Textbox(label="Text")],
    outputs=[gr.Audio(type="filepath"), gr.JSON()],
    title="YourTTS Voice Cloning (English Only, Fixed Speaker)",
    allow_flagging="never"
)
demo.api_name = "/predict"  # ✅ explicit API name registration

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, show_api=True)