coquiAPI

Sleeping

App Files Files Community

anuj-exe commited on Oct 7

Commit

d3331cb

verified ·

1 Parent(s): 51aa34e

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -20

app.py CHANGED Viewed

@@ -2,17 +2,17 @@ import gradio as gr
 from TTS.api import TTS
 import time
 import os
-# Fixed model (YourTTS in English)
 YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
-# Fixed speaker file (pre-cloned voice)
 FIXED_SPEAKER_PATH = "speakers/voice1.wav"
-# Initialize model once
 tts = TTS(YOURTTS_MODEL, gpu=False)
-def synthesize(text):
     output_path = "output.wav"
     start_time = time.time()
@@ -41,7 +41,7 @@ def synthesize(text):
         "speaker_used": FIXED_SPEAKER_PATH.split("/")[-1]
     }
-# ------------------ Gradio UI ------------------
 with gr.Blocks() as demo:
     gr.Markdown("## 🗣️ YourTTS Voice Cloning (English Only, Fixed Speaker)")
@@ -52,7 +52,6 @@ with gr.Blocks() as demo:
     )
     generate_btn = gr.Button("🔊 Generate Speech")
     output_audio = gr.Audio(label="Output Audio", type="filepath")
     metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, etc.)")
@@ -62,16 +61,35 @@ with gr.Blocks() as demo:
         outputs=[output_audio, metadata_json]
     )
-# ------------------ API Interface ------------------
-api_demo = gr.Interface(
-    fn=synthesize,
-    inputs=[gr.Text(label="Text")],   # Only text input
-    outputs=[
-        gr.Audio(type="filepath", label="Generated Audio"),
-        gr.JSON(label="Metadata")
-    ],
-    title="YourTTS Voice Cloning (English Only, Fixed Speaker)"
-)
-# Launch the app with both UI and API
-demo.launch(server_name="0.0.0.0", server_port=7860, show_api=True)

 from TTS.api import TTS
 import time
 import os
+from fastapi import FastAPI
+from pydantic import BaseModel
+import base64
+# ------------------- TTS Setup -------------------
 YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
 FIXED_SPEAKER_PATH = "speakers/voice1.wav"
 tts = TTS(YOURTTS_MODEL, gpu=False)
+def synthesize(text: str):
     output_path = "output.wav"
     start_time = time.time()
         "speaker_used": FIXED_SPEAKER_PATH.split("/")[-1]
     }
+# ------------------- Gradio UI -------------------
 with gr.Blocks() as demo:
     gr.Markdown("## 🗣️ YourTTS Voice Cloning (English Only, Fixed Speaker)")
     )
     generate_btn = gr.Button("🔊 Generate Speech")
     output_audio = gr.Audio(label="Output Audio", type="filepath")
     metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, etc.)")
         outputs=[output_audio, metadata_json]
     )
+# ------------------- FastAPI Setup -------------------
+app = FastAPI(title="YourTTS FastAPI")
+class TTSRequest(BaseModel):
+    text: str
+@app.post("/synthesize/")
+async def tts_endpoint(req: TTSRequest):
+    output_path, metadata = synthesize(req.text)
+    if output_path is None:
+        return {"error": metadata.get("error")}
+    # Return Base64-encoded audio
+    with open(output_path, "rb") as f:
+        audio_bytes = f.read()
+    audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
+    return {
+        "audio_base64": audio_base64,
+        "metadata": metadata
+    }
+# ------------------- Launch -------------------
+if __name__ == "__main__":
+    import threading
+    import uvicorn
+    # Start Gradio UI in a separate thread
+    threading.Thread(target=lambda: demo.launch(server_name="0.0.0.0", server_port=7860)).start()
+    # Start FastAPI
+    uvicorn.run(app, host="0.0.0.0", port=8000)