import gradio as gr from TTS.api import TTS import time import os YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts" FIXED_SPEAKER_PATH = "speakers/voice2.wav" tts = TTS(YOURTTS_MODEL, gpu=False) def synthesize(text): output_path = "output.wav" start_time = time.time() if not os.path.exists(FIXED_SPEAKER_PATH): return None, {"error": f"❌ Speaker file not found: {FIXED_SPEAKER_PATH}"} try: tts.tts_to_file( text=text, speaker_wav=FIXED_SPEAKER_PATH, file_path=output_path, language="en" ) except Exception as e: return None, {"error": str(e)} total_time = time.time() - start_time est_duration = len(text.split()) / 2.5 rtf = round(total_time / est_duration, 3) return output_path, { "language": "English", "processing_time_sec": round(total_time, 3), "real_time_factor": rtf, "model_used": YOURTTS_MODEL, "speaker_used": os.path.basename(FIXED_SPEAKER_PATH) } # ✅ Define both the UI and API on the same Interface object demo = gr.Interface( fn=synthesize, inputs=[gr.Textbox(label="Text")], outputs=[gr.Audio(type="filepath"), gr.JSON()], title="YourTTS Voice Cloning (English Only, Fixed Speaker)", allow_flagging="never" ) demo.api_name = "/predict" # ✅ explicit API name registration if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860, show_api=True)