File size: 2,197 Bytes
fed065a a3b55d1 7f3065e 934ec6a fed065a 2799091 0d474f0 934ec6a 2799091 0d474f0 a3b55d1 2799091 fed065a 7f3065e 934ec6a fed065a c8486b0 0d474f0 934ec6a 0d474f0 fed065a c30d3ee fed065a c30d3ee a3b55d1 fed065a 0d474f0 7f3065e 0d474f0 934ec6a 7f3065e 2799091 fed065a 934ec6a c30d3ee 0d474f0 fed065a 0d474f0 2799091 fed065a 0d474f0 fed065a 934ec6a fed065a b1e1fce 2799091 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import gradio as gr
from TTS.api import TTS
import time
import os
# Fixed model (YourTTS in English)
YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
# Fixed speaker file (pre-cloned voice)
FIXED_SPEAKER_PATH = "speakers/voice2.wav"
# Initialize model once
tts = TTS(YOURTTS_MODEL, gpu=False)
def synthesize(text):
output_path = "output.wav"
start_time = time.time()
if not os.path.exists(FIXED_SPEAKER_PATH):
return None, {"error": f"❌ Speaker file not found: {FIXED_SPEAKER_PATH}"}
try:
tts.tts_to_file(
text=text,
speaker_wav=FIXED_SPEAKER_PATH,
file_path=output_path,
language="en"
)
except Exception as e:
return None, {"error": str(e)}
total_time = time.time() - start_time
est_duration = len(text.split()) / 2.5
rtf = round(total_time / est_duration, 3)
return output_path, {
"language": "English",
"processing_time_sec": round(total_time, 3),
"real_time_factor": rtf,
"model_used": YOURTTS_MODEL,
"speaker_used": FIXED_SPEAKER_PATH.split("/")[-1]
}
# ------------------ Gradio UI ------------------
with gr.Blocks() as demo:
gr.Markdown("## 🗣️ YourTTS Voice Cloning (English Only, Fixed Speaker)")
input_text = gr.Textbox(
label="Text",
placeholder="Type something to synthesize...",
lines=3
)
generate_btn = gr.Button("🔊 Generate Speech")
output_audio = gr.Audio(label="Output Audio", type="filepath")
metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, etc.)")
generate_btn.click(
fn=synthesize,
inputs=[input_text],
outputs=[output_audio, metadata_json]
)
# ------------------ API Interface ------------------
api_demo = gr.Interface(
fn=synthesize,
inputs=[gr.Text(label="Text")], # Only text input
outputs=[
gr.Audio(type="filepath", label="Generated Audio"),
gr.JSON(label="Metadata")
],
title="YourTTS Voice Cloning (English Only, Fixed Speaker)"
)
# Launch the app with both UI and API
demo.launch(server_name="0.0.0.0", server_port=7860, show_api=True)
|