anuj-exe commited on
Commit
d3331cb
·
verified ·
1 Parent(s): 51aa34e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -20
app.py CHANGED
@@ -2,17 +2,17 @@ import gradio as gr
2
  from TTS.api import TTS
3
  import time
4
  import os
 
 
 
5
 
6
- # Fixed model (YourTTS in English)
7
  YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
8
-
9
- # Fixed speaker file (pre-cloned voice)
10
  FIXED_SPEAKER_PATH = "speakers/voice1.wav"
11
 
12
- # Initialize model once
13
  tts = TTS(YOURTTS_MODEL, gpu=False)
14
 
15
- def synthesize(text):
16
  output_path = "output.wav"
17
  start_time = time.time()
18
 
@@ -41,7 +41,7 @@ def synthesize(text):
41
  "speaker_used": FIXED_SPEAKER_PATH.split("/")[-1]
42
  }
43
 
44
- # ------------------ Gradio UI ------------------
45
  with gr.Blocks() as demo:
46
  gr.Markdown("## 🗣️ YourTTS Voice Cloning (English Only, Fixed Speaker)")
47
 
@@ -52,7 +52,6 @@ with gr.Blocks() as demo:
52
  )
53
 
54
  generate_btn = gr.Button("🔊 Generate Speech")
55
-
56
  output_audio = gr.Audio(label="Output Audio", type="filepath")
57
  metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, etc.)")
58
 
@@ -62,16 +61,35 @@ with gr.Blocks() as demo:
62
  outputs=[output_audio, metadata_json]
63
  )
64
 
65
- # ------------------ API Interface ------------------
66
- api_demo = gr.Interface(
67
- fn=synthesize,
68
- inputs=[gr.Text(label="Text")], # Only text input
69
- outputs=[
70
- gr.Audio(type="filepath", label="Generated Audio"),
71
- gr.JSON(label="Metadata")
72
- ],
73
- title="YourTTS Voice Cloning (English Only, Fixed Speaker)"
74
- )
75
-
76
- # Launch the app with both UI and API
77
- demo.launch(server_name="0.0.0.0", server_port=7860, show_api=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from TTS.api import TTS
3
  import time
4
  import os
5
+ from fastapi import FastAPI
6
+ from pydantic import BaseModel
7
+ import base64
8
 
9
+ # ------------------- TTS Setup -------------------
10
  YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
 
 
11
  FIXED_SPEAKER_PATH = "speakers/voice1.wav"
12
 
 
13
  tts = TTS(YOURTTS_MODEL, gpu=False)
14
 
15
+ def synthesize(text: str):
16
  output_path = "output.wav"
17
  start_time = time.time()
18
 
 
41
  "speaker_used": FIXED_SPEAKER_PATH.split("/")[-1]
42
  }
43
 
44
+ # ------------------- Gradio UI -------------------
45
  with gr.Blocks() as demo:
46
  gr.Markdown("## 🗣️ YourTTS Voice Cloning (English Only, Fixed Speaker)")
47
 
 
52
  )
53
 
54
  generate_btn = gr.Button("🔊 Generate Speech")
 
55
  output_audio = gr.Audio(label="Output Audio", type="filepath")
56
  metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, etc.)")
57
 
 
61
  outputs=[output_audio, metadata_json]
62
  )
63
 
64
+ # ------------------- FastAPI Setup -------------------
65
+ app = FastAPI(title="YourTTS FastAPI")
66
+
67
+ class TTSRequest(BaseModel):
68
+ text: str
69
+
70
+ @app.post("/synthesize/")
71
+ async def tts_endpoint(req: TTSRequest):
72
+ output_path, metadata = synthesize(req.text)
73
+ if output_path is None:
74
+ return {"error": metadata.get("error")}
75
+
76
+ # Return Base64-encoded audio
77
+ with open(output_path, "rb") as f:
78
+ audio_bytes = f.read()
79
+ audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
80
+
81
+ return {
82
+ "audio_base64": audio_base64,
83
+ "metadata": metadata
84
+ }
85
+
86
+ # ------------------- Launch -------------------
87
+ if __name__ == "__main__":
88
+ import threading
89
+ import uvicorn
90
+
91
+ # Start Gradio UI in a separate thread
92
+ threading.Thread(target=lambda: demo.launch(server_name="0.0.0.0", server_port=7860)).start()
93
+
94
+ # Start FastAPI
95
+ uvicorn.run(app, host="0.0.0.0", port=8000)