import gradio as gr
from TTS.api import TTS
import time
import os
from fastapi import FastAPI
from pydantic import BaseModel
import base64

# ------------------- TTS Setup -------------------
YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
FIXED_SPEAKER_PATH = "speakers/voice1.wav"

tts = TTS(YOURTTS_MODEL, gpu=False)

def synthesize(text: str):
    output_path = "output.wav"
    start_time = time.time()

    if not os.path.exists(FIXED_SPEAKER_PATH):
        return None, {"error": f"❌ Speaker file not found: {FIXED_SPEAKER_PATH}"}

    try:
        tts.tts_to_file(
            text=text,
            speaker_wav=FIXED_SPEAKER_PATH,
            file_path=output_path,
            language="en"
        )
    except Exception as e:
        return None, {"error": str(e)}

    total_time = time.time() - start_time
    est_duration = len(text.split()) / 2.5
    rtf = round(total_time / est_duration, 3)

    return output_path, {
        "language": "English",
        "processing_time_sec": round(total_time, 3),
        "real_time_factor": rtf,
        "model_used": YOURTTS_MODEL,
        "speaker_used": FIXED_SPEAKER_PATH.split("/")[-1]
    }

# ------------------- Gradio UI -------------------
with gr.Blocks() as demo:
    gr.Markdown("## 🗣️ YourTTS Voice Cloning (English Only, Fixed Speaker)")

    input_text = gr.Textbox(
        label="Text",
        placeholder="Type something to synthesize...",
        lines=3
    )

    generate_btn = gr.Button("🔊 Generate Speech")
    output_audio = gr.Audio(label="Output Audio", type="filepath")
    metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, etc.)")

    generate_btn.click(
        fn=synthesize,
        inputs=[input_text],
        outputs=[output_audio, metadata_json]
    )

# ------------------- FastAPI Setup -------------------
app = FastAPI(title="YourTTS FastAPI")

class TTSRequest(BaseModel):
    text: str

@app.post("/synthesize/")
async def tts_endpoint(req: TTSRequest):
    output_path, metadata = synthesize(req.text)
    if output_path is None:
        return {"error": metadata.get("error")}

    # Return Base64-encoded audio
    with open(output_path, "rb") as f:
        audio_bytes = f.read()
    audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")

    return {
        "audio_base64": audio_base64,
        "metadata": metadata
    }

# ------------------- Launch -------------------
if __name__ == "__main__":
    import threading
    import uvicorn

    # Start Gradio UI in a separate thread
    threading.Thread(target=lambda: demo.launch(server_name="0.0.0.0", server_port=7860)).start()

    # Start FastAPI
    uvicorn.run(app, host="0.0.0.0", port=8000)