anuj-exe commited on
Commit
2799091
·
verified ·
1 Parent(s): d3331cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -39
app.py CHANGED
@@ -2,17 +2,17 @@ import gradio as gr
2
  from TTS.api import TTS
3
  import time
4
  import os
5
- from fastapi import FastAPI
6
- from pydantic import BaseModel
7
- import base64
8
 
9
- # ------------------- TTS Setup -------------------
10
  YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
11
- FIXED_SPEAKER_PATH = "speakers/voice1.wav"
12
 
 
 
 
 
13
  tts = TTS(YOURTTS_MODEL, gpu=False)
14
 
15
- def synthesize(text: str):
16
  output_path = "output.wav"
17
  start_time = time.time()
18
 
@@ -41,7 +41,7 @@ def synthesize(text: str):
41
  "speaker_used": FIXED_SPEAKER_PATH.split("/")[-1]
42
  }
43
 
44
- # ------------------- Gradio UI -------------------
45
  with gr.Blocks() as demo:
46
  gr.Markdown("## 🗣️ YourTTS Voice Cloning (English Only, Fixed Speaker)")
47
 
@@ -52,6 +52,7 @@ with gr.Blocks() as demo:
52
  )
53
 
54
  generate_btn = gr.Button("🔊 Generate Speech")
 
55
  output_audio = gr.Audio(label="Output Audio", type="filepath")
56
  metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, etc.)")
57
 
@@ -61,35 +62,16 @@ with gr.Blocks() as demo:
61
  outputs=[output_audio, metadata_json]
62
  )
63
 
64
- # ------------------- FastAPI Setup -------------------
65
- app = FastAPI(title="YourTTS FastAPI")
66
-
67
- class TTSRequest(BaseModel):
68
- text: str
69
-
70
- @app.post("/synthesize/")
71
- async def tts_endpoint(req: TTSRequest):
72
- output_path, metadata = synthesize(req.text)
73
- if output_path is None:
74
- return {"error": metadata.get("error")}
75
-
76
- # Return Base64-encoded audio
77
- with open(output_path, "rb") as f:
78
- audio_bytes = f.read()
79
- audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
80
-
81
- return {
82
- "audio_base64": audio_base64,
83
- "metadata": metadata
84
- }
85
-
86
- # ------------------- Launch -------------------
87
- if __name__ == "__main__":
88
- import threading
89
- import uvicorn
90
-
91
- # Start Gradio UI in a separate thread
92
- threading.Thread(target=lambda: demo.launch(server_name="0.0.0.0", server_port=7860)).start()
93
-
94
- # Start FastAPI
95
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
2
  from TTS.api import TTS
3
  import time
4
  import os
 
 
 
5
 
6
+ # Fixed model (YourTTS in English)
7
  YOURTTS_MODEL = "tts_models/multilingual/multi-dataset/your_tts"
 
8
 
9
+ # Fixed speaker file (pre-cloned voice)
10
+ FIXED_SPEAKER_PATH = "speakers/voice2.wav"
11
+
12
+ # Initialize model once
13
  tts = TTS(YOURTTS_MODEL, gpu=False)
14
 
15
+ def synthesize(text):
16
  output_path = "output.wav"
17
  start_time = time.time()
18
 
 
41
  "speaker_used": FIXED_SPEAKER_PATH.split("/")[-1]
42
  }
43
 
44
+ # ------------------ Gradio UI ------------------
45
  with gr.Blocks() as demo:
46
  gr.Markdown("## 🗣️ YourTTS Voice Cloning (English Only, Fixed Speaker)")
47
 
 
52
  )
53
 
54
  generate_btn = gr.Button("🔊 Generate Speech")
55
+
56
  output_audio = gr.Audio(label="Output Audio", type="filepath")
57
  metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, etc.)")
58
 
 
62
  outputs=[output_audio, metadata_json]
63
  )
64
 
65
+ # ------------------ API Interface ------------------
66
+ api_demo = gr.Interface(
67
+ fn=synthesize,
68
+ inputs=[gr.Text(label="Text")], # Only text input
69
+ outputs=[
70
+ gr.Audio(type="filepath", label="Generated Audio"),
71
+ gr.JSON(label="Metadata")
72
+ ],
73
+ title="YourTTS Voice Cloning (English Only, Fixed Speaker)"
74
+ )
75
+
76
+ # Launch the app with both UI and API
77
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_api=True)