Samit-khedekar commited on
Commit
7f3065e
·
verified ·
1 Parent(s): b0a7975

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -11
app.py CHANGED
@@ -1,19 +1,57 @@
1
  import gradio as gr
2
  from TTS.api import TTS
 
 
3
 
4
- # Load TTS model (first time might take 1–2 minutes)
5
- tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=True, gpu=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- def synthesize(text):
8
  output_path = "output.wav"
 
9
  tts.tts_to_file(text=text, file_path=output_path)
10
- return output_path
 
 
 
 
11
 
12
- # Gradio UI
13
- gr.Interface(
 
 
 
 
 
 
14
  fn=synthesize,
15
- inputs=gr.Textbox(label="Enter text to synthesize"),
16
- outputs=gr.Audio(type="filepath", label="Generated Audio"),
17
- title="🗣️ Coqui TTS - CPU Demo",
18
- description="Enter text and generate speech using Coqui's Tacotron2-DDC model (free and fast).",
19
- ).launch(share=True)
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from TTS.api import TTS
3
+ import time
4
+ import os
5
 
6
+ # Available voice models
7
+ models = {
8
+ "fast_pitch": "tts_models/en/ljspeech/fast_pitch",
9
+ "tacotron2": "tts_models/en/ljspeech/tacotron2-DDC",
10
+ "glow_tts": "tts_models/en/ljspeech/glow-tts"
11
+ }
12
+
13
+ # Current model loaded (default)
14
+ current_model_key = "fast_pitch"
15
+ tts = TTS(models[current_model_key], gpu=False)
16
+
17
+ # Synthesize function
18
+ def synthesize(text, selected_model=None):
19
+ global tts, current_model_key
20
+
21
+ # Switch model if needed
22
+ if selected_model and selected_model != current_model_key:
23
+ current_model_key = selected_model
24
+ tts = TTS(models[current_model_key], gpu=False)
25
 
 
26
  output_path = "output.wav"
27
+ start_time = time.time()
28
  tts.tts_to_file(text=text, file_path=output_path)
29
+ total_time = time.time() - start_time
30
+
31
+ # Calculate RTF (approximate)
32
+ audio_duration = len(text.split()) / 2.5 # est. 2.5 words/sec
33
+ rtf = round(total_time / audio_duration, 3)
34
 
35
+ return output_path, {
36
+ "processing_time_sec": round(total_time, 3),
37
+ "real_time_factor": rtf,
38
+ "model_used": current_model_key
39
+ }
40
+
41
+ # Gradio API
42
+ api = gr.Interface(
43
  fn=synthesize,
44
+ inputs=[
45
+ gr.Textbox(label="Input Text"),
46
+ gr.Dropdown(list(models.keys()), value="fast_pitch", label="Select Voice Model"),
47
+ ],
48
+ outputs=[
49
+ gr.Audio(type="filepath", label="Synthesized Audio"),
50
+ gr.JSON(label="Meta Info (Time, Model, RTF)")
51
+ ],
52
+ title="🗣️ TTS API with Model Selector",
53
+ description="Send text to convert it to speech and get metadata via Gradio API. Change the voice model dynamically."
54
+ )
55
+
56
+ api.launch()
57
+