Update app.py
Browse files
app.py
CHANGED
|
@@ -2,17 +2,17 @@ import gradio as gr
|
|
| 2 |
from TTS.api import TTS
|
| 3 |
import time
|
| 4 |
import os
|
| 5 |
-
from fastapi import FastAPI
|
| 6 |
-
from pydantic import BaseModel
|
| 7 |
-
import base64
|
| 8 |
|
| 9 |
-
#
|
| 10 |
YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
|
| 11 |
-
FIXED_SPEAKER_PATH = "speakers/voice1.wav"
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
tts = TTS(YOURTTS_MODEL, gpu=False)
|
| 14 |
|
| 15 |
-
def synthesize(text
|
| 16 |
output_path = "output.wav"
|
| 17 |
start_time = time.time()
|
| 18 |
|
|
@@ -41,7 +41,7 @@ def synthesize(text: str):
|
|
| 41 |
"speaker_used": FIXED_SPEAKER_PATH.split("/")[-1]
|
| 42 |
}
|
| 43 |
|
| 44 |
-
#
|
| 45 |
with gr.Blocks() as demo:
|
| 46 |
gr.Markdown("## 🗣️ YourTTS Voice Cloning (English Only, Fixed Speaker)")
|
| 47 |
|
|
@@ -52,6 +52,7 @@ with gr.Blocks() as demo:
|
|
| 52 |
)
|
| 53 |
|
| 54 |
generate_btn = gr.Button("🔊 Generate Speech")
|
|
|
|
| 55 |
output_audio = gr.Audio(label="Output Audio", type="filepath")
|
| 56 |
metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, etc.)")
|
| 57 |
|
|
@@ -61,35 +62,16 @@ with gr.Blocks() as demo:
|
|
| 61 |
outputs=[output_audio, metadata_json]
|
| 62 |
)
|
| 63 |
|
| 64 |
-
#
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
with open(output_path, "rb") as f:
|
| 78 |
-
audio_bytes = f.read()
|
| 79 |
-
audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
|
| 80 |
-
|
| 81 |
-
return {
|
| 82 |
-
"audio_base64": audio_base64,
|
| 83 |
-
"metadata": metadata
|
| 84 |
-
}
|
| 85 |
-
|
| 86 |
-
# ------------------- Launch -------------------
|
| 87 |
-
if __name__ == "__main__":
|
| 88 |
-
import threading
|
| 89 |
-
import uvicorn
|
| 90 |
-
|
| 91 |
-
# Start Gradio UI in a separate thread
|
| 92 |
-
threading.Thread(target=lambda: demo.launch(server_name="0.0.0.0", server_port=7860)).start()
|
| 93 |
-
|
| 94 |
-
# Start FastAPI
|
| 95 |
-
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
|
|
| 2 |
from TTS.api import TTS
|
| 3 |
import time
|
| 4 |
import os
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
+
# Fixed model (YourTTS in English)
|
| 7 |
YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
|
|
|
|
| 8 |
|
| 9 |
+
# Fixed speaker file (pre-cloned voice)
|
| 10 |
+
FIXED_SPEAKER_PATH = "speakers/voice2.wav"
|
| 11 |
+
|
| 12 |
+
# Initialize model once
|
| 13 |
tts = TTS(YOURTTS_MODEL, gpu=False)
|
| 14 |
|
| 15 |
+
def synthesize(text):
|
| 16 |
output_path = "output.wav"
|
| 17 |
start_time = time.time()
|
| 18 |
|
|
|
|
| 41 |
"speaker_used": FIXED_SPEAKER_PATH.split("/")[-1]
|
| 42 |
}
|
| 43 |
|
| 44 |
+
# ------------------ Gradio UI ------------------
|
| 45 |
with gr.Blocks() as demo:
|
| 46 |
gr.Markdown("## 🗣️ YourTTS Voice Cloning (English Only, Fixed Speaker)")
|
| 47 |
|
|
|
|
| 52 |
)
|
| 53 |
|
| 54 |
generate_btn = gr.Button("🔊 Generate Speech")
|
| 55 |
+
|
| 56 |
output_audio = gr.Audio(label="Output Audio", type="filepath")
|
| 57 |
metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, etc.)")
|
| 58 |
|
|
|
|
| 62 |
outputs=[output_audio, metadata_json]
|
| 63 |
)
|
| 64 |
|
| 65 |
+
# ------------------ API Interface ------------------
|
| 66 |
+
api_demo = gr.Interface(
|
| 67 |
+
fn=synthesize,
|
| 68 |
+
inputs=[gr.Text(label="Text")], # Only text input
|
| 69 |
+
outputs=[
|
| 70 |
+
gr.Audio(type="filepath", label="Generated Audio"),
|
| 71 |
+
gr.JSON(label="Metadata")
|
| 72 |
+
],
|
| 73 |
+
title="YourTTS Voice Cloning (English Only, Fixed Speaker)"
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
# Launch the app with both UI and API
|
| 77 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, show_api=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|