Samit-khedekar commited on
Commit
91c3a86
·
verified ·
1 Parent(s): c30d3ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -17
app.py CHANGED
@@ -6,27 +6,23 @@ import time
6
  default_models = {
7
  "FastPitch (Female - LJSpeech)": "tts_models/en/ljspeech/fast_pitch",
8
  "Glow-TTS (Female - LJSpeech)": "tts_models/en/ljspeech/glow-tts",
9
- "Tactron2 (Female- LJSpeaker)": "tts_models/en/ljspeech/tacotron2-DDC",
10
  "VCTK (Multi-speaker)": "tts_models/en/vctk/vits",
11
  "YourTTS (Cloning + Multi-speaker)": "tts_models/multilingual/multi-dataset/your_tts",
12
-
13
  }
14
 
15
- # Example speaker IDs (VCTK)
16
  vctk_speakers = ["p225", "p227", "p229", "p230", "p233", "p234", "p236"]
17
 
18
- # Default state
19
  current_model_key = list(default_models.values())[0]
20
  tts = TTS(current_model_key, gpu=False)
21
 
22
- def synthesize(text, selected_model, speaker_id, custom_model_url, speaker_wav_path):
23
  global tts, current_model_key
24
 
25
- # Decide model
26
- if custom_model_url:
27
- model_path = custom_model_url
28
- else:
29
- model_path = default_models[selected_model]
30
 
31
  if model_path != current_model_key:
32
  tts = TTS(model_path, gpu=False)
@@ -35,9 +31,9 @@ def synthesize(text, selected_model, speaker_id, custom_model_url, speaker_wav_p
35
  output_path = "output.wav"
36
  start_time = time.time()
37
 
38
- # Handle speaker cloning
39
  if "your_tts" in model_path.lower() and speaker_wav_path:
40
- tts.tts_to_file(text=text, speaker_wav=speaker_wav_path, file_path=output_path)
41
  speaker_info = f"WAV Upload: {speaker_wav_path.split('/')[-1]}"
42
  elif "vctk" in model_path.lower() and speaker_id and speaker_id != "None":
43
  tts.tts_to_file(text=text, speaker=speaker_id, file_path=output_path)
@@ -51,6 +47,7 @@ def synthesize(text, selected_model, speaker_id, custom_model_url, speaker_wav_p
51
  rtf = round(total_time / est_duration, 3)
52
 
53
  return output_path, {
 
54
  "processing_time_sec": round(total_time, 3),
55
  "real_time_factor": rtf,
56
  "model_used": model_path,
@@ -59,11 +56,18 @@ def synthesize(text, selected_model, speaker_id, custom_model_url, speaker_wav_p
59
 
60
  # Gradio UI
61
  with gr.Blocks() as demo:
62
- gr.Markdown("## 🗣️ TTS App with Model + Speaker Selection + Cloning")
63
 
64
  with gr.Row():
65
  input_text = gr.Textbox(label="Text", placeholder="Type something...", lines=3)
66
 
 
 
 
 
 
 
 
67
  with gr.Row():
68
  model_dropdown = gr.Dropdown(choices=list(default_models.keys()), label="Select TTS Model")
69
  speaker_dropdown = gr.Dropdown(choices=["None"] + vctk_speakers, label="Speaker ID (for VCTK)")
@@ -75,13 +79,13 @@ with gr.Blocks() as demo:
75
  generate_btn = gr.Button("🔊 Generate Speech")
76
 
77
  output_audio = gr.Audio(label="Output Audio", type="filepath")
78
- metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF)")
79
 
80
  generate_btn.click(
81
  fn=synthesize,
82
- inputs=[input_text, model_dropdown, speaker_dropdown, custom_model_box, speaker_wav],
83
  outputs=[output_audio, metadata_json]
84
  )
85
 
86
- demo.launch()
87
-
 
6
  default_models = {
7
  "FastPitch (Female - LJSpeech)": "tts_models/en/ljspeech/fast_pitch",
8
  "Glow-TTS (Female - LJSpeech)": "tts_models/en/ljspeech/glow-tts",
9
+ "Tactron2 (Female - LJSpeaker)": "tts_models/en/ljspeech/tacotron2-DDC",
10
  "VCTK (Multi-speaker)": "tts_models/en/vctk/vits",
11
  "YourTTS (Cloning + Multi-speaker)": "tts_models/multilingual/multi-dataset/your_tts",
 
12
  }
13
 
14
+ # Example speaker IDs for VCTK
15
  vctk_speakers = ["p225", "p227", "p229", "p230", "p233", "p234", "p236"]
16
 
17
+ # Initial model setup
18
  current_model_key = list(default_models.values())[0]
19
  tts = TTS(current_model_key, gpu=False)
20
 
21
+ def synthesize(text, selected_model, speaker_id, custom_model_url, speaker_wav_path, selected_language):
22
  global tts, current_model_key
23
 
24
+ # Determine model path
25
+ model_path = custom_model_url if custom_model_url else default_models[selected_model]
 
 
 
26
 
27
  if model_path != current_model_key:
28
  tts = TTS(model_path, gpu=False)
 
31
  output_path = "output.wav"
32
  start_time = time.time()
33
 
34
+ # Text-to-Speech conversion logic
35
  if "your_tts" in model_path.lower() and speaker_wav_path:
36
+ tts.tts_to_file(text=text, speaker_wav=speaker_wav_path, file_path=output_path, language=selected_language.lower())
37
  speaker_info = f"WAV Upload: {speaker_wav_path.split('/')[-1]}"
38
  elif "vctk" in model_path.lower() and speaker_id and speaker_id != "None":
39
  tts.tts_to_file(text=text, speaker=speaker_id, file_path=output_path)
 
47
  rtf = round(total_time / est_duration, 3)
48
 
49
  return output_path, {
50
+ "language_selected": selected_language,
51
  "processing_time_sec": round(total_time, 3),
52
  "real_time_factor": rtf,
53
  "model_used": model_path,
 
56
 
57
  # Gradio UI
58
  with gr.Blocks() as demo:
59
+ gr.Markdown("## 🗣️ TTS App with Model + Speaker Selection + Language + Cloning")
60
 
61
  with gr.Row():
62
  input_text = gr.Textbox(label="Text", placeholder="Type something...", lines=3)
63
 
64
+ with gr.Row():
65
+ language_dropdown = gr.Dropdown(
66
+ choices=["English", "Hindi", "Japanese", "French", "Spanish"],
67
+ value="English",
68
+ label="Select Language"
69
+ )
70
+
71
  with gr.Row():
72
  model_dropdown = gr.Dropdown(choices=list(default_models.keys()), label="Select TTS Model")
73
  speaker_dropdown = gr.Dropdown(choices=["None"] + vctk_speakers, label="Speaker ID (for VCTK)")
 
79
  generate_btn = gr.Button("🔊 Generate Speech")
80
 
81
  output_audio = gr.Audio(label="Output Audio", type="filepath")
82
+ metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, Language)")
83
 
84
  generate_btn.click(
85
  fn=synthesize,
86
+ inputs=[input_text, model_dropdown, speaker_dropdown, custom_model_box, speaker_wav, language_dropdown],
87
  outputs=[output_audio, metadata_json]
88
  )
89
 
90
+ # Enable external access (API access or local LAN sharing)
91
+ demo.launch(share=True, server_name="0.0.0.0", server_port=7860)