coquiAPI / app.py
anuj-exe's picture
Update app.py
2799091 verified
raw
history blame
2.2 kB
import gradio as gr
from TTS.api import TTS
import time
import os
# Fixed model (YourTTS in English)
YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
# Fixed speaker file (pre-cloned voice)
FIXED_SPEAKER_PATH = "speakers/voice2.wav"
# Initialize model once
tts = TTS(YOURTTS_MODEL, gpu=False)
def synthesize(text):
output_path = "output.wav"
start_time = time.time()
if not os.path.exists(FIXED_SPEAKER_PATH):
return None, {"error": f"โŒ Speaker file not found: {FIXED_SPEAKER_PATH}"}
try:
tts.tts_to_file(
text=text,
speaker_wav=FIXED_SPEAKER_PATH,
file_path=output_path,
language="en"
)
except Exception as e:
return None, {"error": str(e)}
total_time = time.time() - start_time
est_duration = len(text.split()) / 2.5
rtf = round(total_time / est_duration, 3)
return output_path, {
"language": "English",
"processing_time_sec": round(total_time, 3),
"real_time_factor": rtf,
"model_used": YOURTTS_MODEL,
"speaker_used": FIXED_SPEAKER_PATH.split("/")[-1]
}
# ------------------ Gradio UI ------------------
with gr.Blocks() as demo:
gr.Markdown("## ๐Ÿ—ฃ๏ธ YourTTS Voice Cloning (English Only, Fixed Speaker)")
input_text = gr.Textbox(
label="Text",
placeholder="Type something to synthesize...",
lines=3
)
generate_btn = gr.Button("๐Ÿ”Š Generate Speech")
output_audio = gr.Audio(label="Output Audio", type="filepath")
metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, etc.)")
generate_btn.click(
fn=synthesize,
inputs=[input_text],
outputs=[output_audio, metadata_json]
)
# ------------------ API Interface ------------------
api_demo = gr.Interface(
fn=synthesize,
inputs=[gr.Text(label="Text")], # Only text input
outputs=[
gr.Audio(type="filepath", label="Generated Audio"),
gr.JSON(label="Metadata")
],
title="YourTTS Voice Cloning (English Only, Fixed Speaker)"
)
# Launch the app with both UI and API
demo.launch(server_name="0.0.0.0", server_port=7860, show_api=True)