import gradio as gr from TTS.api import TTS import time import os from fastapi import FastAPI from pydantic import BaseModel import base64 # ------------------- TTS Setup ------------------- YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts" FIXED_SPEAKER_PATH = "speakers/voice1.wav" tts = TTS(YOURTTS_MODEL, gpu=False) def synthesize(text: str): output_path = "output.wav" start_time = time.time() if not os.path.exists(FIXED_SPEAKER_PATH): return None, {"error": f"❌ Speaker file not found: {FIXED_SPEAKER_PATH}"} try: tts.tts_to_file( text=text, speaker_wav=FIXED_SPEAKER_PATH, file_path=output_path, language="en" ) except Exception as e: return None, {"error": str(e)} total_time = time.time() - start_time est_duration = len(text.split()) / 2.5 rtf = round(total_time / est_duration, 3) return output_path, { "language": "English", "processing_time_sec": round(total_time, 3), "real_time_factor": rtf, "model_used": YOURTTS_MODEL, "speaker_used": FIXED_SPEAKER_PATH.split("/")[-1] } # ------------------- Gradio UI ------------------- with gr.Blocks() as demo: gr.Markdown("## 🗣️ YourTTS Voice Cloning (English Only, Fixed Speaker)") input_text = gr.Textbox( label="Text", placeholder="Type something to synthesize...", lines=3 ) generate_btn = gr.Button("🔊 Generate Speech") output_audio = gr.Audio(label="Output Audio", type="filepath") metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, etc.)") generate_btn.click( fn=synthesize, inputs=[input_text], outputs=[output_audio, metadata_json] ) # ------------------- FastAPI Setup ------------------- app = FastAPI(title="YourTTS FastAPI") class TTSRequest(BaseModel): text: str @app.post("/synthesize/") async def tts_endpoint(req: TTSRequest): output_path, metadata = synthesize(req.text) if output_path is None: return {"error": metadata.get("error")} # Return Base64-encoded audio with open(output_path, "rb") as f: audio_bytes = f.read() audio_base64 = base64.b64encode(audio_bytes).decode("utf-8") return { "audio_base64": audio_base64, "metadata": metadata } # ------------------- Launch ------------------- if __name__ == "__main__": import threading import uvicorn # Start Gradio UI in a separate thread threading.Thread(target=lambda: demo.launch(server_name="0.0.0.0", server_port=7860)).start() # Start FastAPI uvicorn.run(app, host="0.0.0.0", port=8000)