Samit-khedekar commited on
Commit
c8486b0
ยท
verified ยท
1 Parent(s): 91c3a86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -17
app.py CHANGED
@@ -11,9 +11,21 @@ default_models = {
11
  "YourTTS (Cloning + Multi-speaker)": "tts_models/multilingual/multi-dataset/your_tts",
12
  }
13
 
14
- # Example speaker IDs for VCTK
15
  vctk_speakers = ["p225", "p227", "p229", "p230", "p233", "p234", "p236"]
16
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  # Initial model setup
18
  current_model_key = list(default_models.values())[0]
19
  tts = TTS(current_model_key, gpu=False)
@@ -21,9 +33,9 @@ tts = TTS(current_model_key, gpu=False)
21
  def synthesize(text, selected_model, speaker_id, custom_model_url, speaker_wav_path, selected_language):
22
  global tts, current_model_key
23
 
24
- # Determine model path
25
  model_path = custom_model_url if custom_model_url else default_models[selected_model]
26
 
 
27
  if model_path != current_model_key:
28
  tts = TTS(model_path, gpu=False)
29
  current_model_key = model_path
@@ -31,16 +43,29 @@ def synthesize(text, selected_model, speaker_id, custom_model_url, speaker_wav_p
31
  output_path = "output.wav"
32
  start_time = time.time()
33
 
34
- # Text-to-Speech conversion logic
35
- if "your_tts" in model_path.lower() and speaker_wav_path:
36
- tts.tts_to_file(text=text, speaker_wav=speaker_wav_path, file_path=output_path, language=selected_language.lower())
37
- speaker_info = f"WAV Upload: {speaker_wav_path.split('/')[-1]}"
38
- elif "vctk" in model_path.lower() and speaker_id and speaker_id != "None":
39
- tts.tts_to_file(text=text, speaker=speaker_id, file_path=output_path)
40
- speaker_info = speaker_id
41
- else:
42
- tts.tts_to_file(text=text, file_path=output_path)
43
- speaker_info = "Default"
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  total_time = time.time() - start_time
46
  est_duration = len(text.split()) / 2.5
@@ -54,16 +79,16 @@ def synthesize(text, selected_model, speaker_id, custom_model_url, speaker_wav_p
54
  "speaker_used": speaker_info
55
  }
56
 
57
- # Gradio UI
58
  with gr.Blocks() as demo:
59
- gr.Markdown("## ๐Ÿ—ฃ๏ธ TTS App with Model + Speaker Selection + Language + Cloning")
60
 
61
  with gr.Row():
62
  input_text = gr.Textbox(label="Text", placeholder="Type something...", lines=3)
63
 
64
  with gr.Row():
65
  language_dropdown = gr.Dropdown(
66
- choices=["English", "Hindi", "Japanese", "French", "Spanish"],
67
  value="English",
68
  label="Select Language"
69
  )
@@ -79,7 +104,7 @@ with gr.Blocks() as demo:
79
  generate_btn = gr.Button("๐Ÿ”Š Generate Speech")
80
 
81
  output_audio = gr.Audio(label="Output Audio", type="filepath")
82
- metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, Language)")
83
 
84
  generate_btn.click(
85
  fn=synthesize,
@@ -87,5 +112,15 @@ with gr.Blocks() as demo:
87
  outputs=[output_audio, metadata_json]
88
  )
89
 
90
- # Enable external access (API access or local LAN sharing)
 
 
 
 
 
 
 
 
 
 
91
  demo.launch(share=True, server_name="0.0.0.0", server_port=7860)
 
11
  "YourTTS (Cloning + Multi-speaker)": "tts_models/multilingual/multi-dataset/your_tts",
12
  }
13
 
14
+ # Supported speaker IDs for VCTK
15
  vctk_speakers = ["p225", "p227", "p229", "p230", "p233", "p234", "p236"]
16
 
17
+ # Language display name -> model language code
18
+ language_map = {
19
+ "English": "en",
20
+ "French": "fr-fr",
21
+ "Portuguese": "pt-br",
22
+ "Hindi": "hi", # Not supported in YourTTS
23
+ "Japanese": "ja" # Not supported in YourTTS
24
+ }
25
+
26
+ # Supported languages for YourTTS
27
+ yourtts_supported_languages = ["en", "fr-fr", "pt-br"]
28
+
29
  # Initial model setup
30
  current_model_key = list(default_models.values())[0]
31
  tts = TTS(current_model_key, gpu=False)
 
33
  def synthesize(text, selected_model, speaker_id, custom_model_url, speaker_wav_path, selected_language):
34
  global tts, current_model_key
35
 
 
36
  model_path = custom_model_url if custom_model_url else default_models[selected_model]
37
 
38
+ # Load the model only if different from current
39
  if model_path != current_model_key:
40
  tts = TTS(model_path, gpu=False)
41
  current_model_key = model_path
 
43
  output_path = "output.wav"
44
  start_time = time.time()
45
 
46
+ lang_code = language_map.get(selected_language, "en")
47
+ speaker_info = "Default"
48
+
49
+ try:
50
+ if "your_tts" in model_path.lower():
51
+ if lang_code not in yourtts_supported_languages:
52
+ raise ValueError(f"โŒ '{selected_language}' is not supported by YourTTS. Please choose from English, French, or Portuguese.")
53
+
54
+ if not speaker_wav_path:
55
+ raise ValueError("โŒ Speaker WAV file is required for cloning with YourTTS.")
56
+
57
+ tts.tts_to_file(text=text, speaker_wav=speaker_wav_path, file_path=output_path, language=lang_code)
58
+ speaker_info = f"WAV Upload: {speaker_wav_path.split('/')[-1]}"
59
+
60
+ elif "vctk" in model_path.lower() and speaker_id and speaker_id != "None":
61
+ tts.tts_to_file(text=text, speaker=speaker_id, file_path=output_path)
62
+ speaker_info = speaker_id
63
+
64
+ else:
65
+ tts.tts_to_file(text=text, file_path=output_path)
66
+
67
+ except ValueError as e:
68
+ return None, {"error": str(e)}
69
 
70
  total_time = time.time() - start_time
71
  est_duration = len(text.split()) / 2.5
 
79
  "speaker_used": speaker_info
80
  }
81
 
82
+ # Gradio UI + API Interface
83
  with gr.Blocks() as demo:
84
+ gr.Markdown("## ๐Ÿ—ฃ๏ธ TTS App (Model, Speaker, Language, Cloning, API-ready)")
85
 
86
  with gr.Row():
87
  input_text = gr.Textbox(label="Text", placeholder="Type something...", lines=3)
88
 
89
  with gr.Row():
90
  language_dropdown = gr.Dropdown(
91
+ choices=list(language_map.keys()),
92
  value="English",
93
  label="Select Language"
94
  )
 
104
  generate_btn = gr.Button("๐Ÿ”Š Generate Speech")
105
 
106
  output_audio = gr.Audio(label="Output Audio", type="filepath")
107
+ metadata_json = gr.JSON(label="Meta Info (Time, Model, RTF, Language / Error)")
108
 
109
  generate_btn.click(
110
  fn=synthesize,
 
112
  outputs=[output_audio, metadata_json]
113
  )
114
 
115
+ # API endpoint setup (backend integration)
116
+ gr.Markdown("### ๐Ÿ”Œ API Endpoint")
117
+ gr.Interface(
118
+ fn=synthesize,
119
+ inputs=[gr.Text(), gr.Text(), gr.Text(), gr.Text(), gr.Audio(type="filepath"), gr.Text()],
120
+ outputs=[gr.Audio(type="filepath"), gr.JSON()],
121
+ allow_flagging="never",
122
+ live=False
123
+ ).queue().launch(inline=True)
124
+
125
+ # Start Gradio server with external access
126
  demo.launch(share=True, server_name="0.0.0.0", server_port=7860)