File size: 2,197 Bytes
fed065a
a3b55d1
7f3065e
934ec6a
fed065a
2799091
0d474f0
934ec6a
2799091
 
 
 
0d474f0
a3b55d1
2799091
fed065a
7f3065e
 
934ec6a
 
fed065a
c8486b0
0d474f0
 
934ec6a
0d474f0
 
 
 
fed065a
c30d3ee
 
fed065a
c30d3ee
a3b55d1
fed065a
0d474f0
7f3065e
 
0d474f0
934ec6a
7f3065e
 
2799091
fed065a
934ec6a
c30d3ee
0d474f0
 
 
 
 
fed065a
0d474f0
2799091
fed065a
0d474f0
fed065a
 
 
934ec6a
fed065a
 
b1e1fce
2799091
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gradio as gr
from TTS.api import TTS
import time
import os

# Fixed model (YourTTS in English)
YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"

# Fixed speaker file (pre-cloned voice)
FIXED_SPEAKER_PATH = "speakers/voice2.wav"

# Initialize model once
tts = TTS(YOURTTS_MODEL, gpu=False)

def synthesize(text):
    output_path = "output.wav"
    start_time = time.time()

    if not os.path.exists(FIXED_SPEAKER_PATH):
        return None, {"error": f"❌ Speaker file not found: {FIXED_SPEAKER_PATH}"}

    try:
        tts.tts_to_file(
            text=text,
            speaker_wav=FIXED_SPEAKER_PATH,
            file_path=output_path,
            language="en"
        )
    except Exception as e:
        return None, {"error": str(e)}

    total_time = time.time() - start_time
    est_duration = len(text.split()) / 2.5
    rtf = round(total_time / est_duration, 3)

    return output_path, {
        "language": "English",
        "processing_time_sec": round(total_time, 3),
        "real_time_factor": rtf,
        "model_used": YOURTTS_MODEL,
        "speaker_used": FIXED_SPEAKER_PATH.split("/")[-1]
    }

# ------------------ Gradio UI ------------------
with gr.Blocks() as demo:
    gr.Markdown("## 🗣️ YourTTS Voice Cloning (English Only, Fixed Speaker)")

    input_text = gr.Textbox(
        label="Text",
        placeholder="Type something to synthesize...",
        lines=3
    )

    generate_btn = gr.Button("🔊 Generate Speech")

    output_audio = gr.Audio(label="Output Audio", type="filepath")
    metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, etc.)")

    generate_btn.click(
        fn=synthesize,
        inputs=[input_text],
        outputs=[output_audio, metadata_json]
    )

# ------------------ API Interface ------------------
api_demo = gr.Interface(
    fn=synthesize,
    inputs=[gr.Text(label="Text")],   # Only text input
    outputs=[
        gr.Audio(type="filepath", label="Generated Audio"),
        gr.JSON(label="Metadata")
    ],
    title="YourTTS Voice Cloning (English Only, Fixed Speaker)"
)

# Launch the app with both UI and API
demo.launch(server_name="0.0.0.0", server_port=7860, show_api=True)