import gradio as gr from TTS.api import TTS import time import os # Fixed model (YourTTS in English) YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts" # Fixed speaker file (pre-cloned voice) FIXED_SPEAKER_PATH = "speakers/voice2.wav" # Initialize model once tts = TTS(YOURTTS_MODEL, gpu=False) def synthesize(text): output_path = "output.wav" start_time = time.time() if not os.path.exists(FIXED_SPEAKER_PATH): return None, {"error": f"❌ Speaker file not found: {FIXED_SPEAKER_PATH}"} try: tts.tts_to_file( text=text, speaker_wav=FIXED_SPEAKER_PATH, file_path=output_path, language="en" ) except Exception as e: return None, {"error": str(e)} total_time = time.time() - start_time est_duration = len(text.split()) / 2.5 rtf = round(total_time / est_duration, 3) return output_path, { "language": "English", "processing_time_sec": round(total_time, 3), "real_time_factor": rtf, "model_used": YOURTTS_MODEL, "speaker_used": FIXED_SPEAKER_PATH.split("/")[-1] } # ------------------ Gradio UI ------------------ with gr.Blocks() as demo: gr.Markdown("## 🗣️ YourTTS Voice Cloning (English Only, Fixed Speaker)") input_text = gr.Textbox( label="Text", placeholder="Type something to synthesize...", lines=3 ) generate_btn = gr.Button("🔊 Generate Speech") output_audio = gr.Audio(label="Output Audio", type="filepath") metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, etc.)") generate_btn.click( fn=synthesize, inputs=[input_text], outputs=[output_audio, metadata_json] ) # ------------------ API Interface ------------------ api_demo = gr.Interface( fn=synthesize, inputs=[gr.Text(label="Text")], # Only text input outputs=[ gr.Audio(type="filepath", label="Generated Audio"), gr.JSON(label="Metadata") ], title="YourTTS Voice Cloning (English Only, Fixed Speaker)" ) # Launch the app with both UI and API demo.launch(server_name="0.0.0.0", server_port=7860, show_api=True)