Spaces:
Runtime error
Runtime error
| # app.py | |
| import gradio as gr | |
| import torch | |
| # --- Global Settings & Model Caching --- | |
| MODEL_CACHE = {} | |
| DEVICE = torch.device('cpu') | |
| DEFAULT_MODEL_ID = 'v3_en_indic' | |
| # --- Model Loading Logic --- | |
| def load_model(model_id, language): | |
| """Loads a Silero model from cache or from torch.hub.""" | |
| if model_id in MODEL_CACHE: | |
| print(f"Loading model '{model_id}' from cache.") | |
| return MODEL_CACHE[model_id] | |
| print(f"Loading model '{model_id}' from torch.hub...") | |
| model, _ = torch.hub.load(repo_or_dir='snakers4/silero-models', | |
| model='silero_tts', | |
| language=language, | |
| speaker=model_id) | |
| model.to(DEVICE) | |
| MODEL_CACHE[model_id] = model | |
| return model | |
| def get_model_details(model_id): | |
| """Returns the language for a given model ID.""" | |
| if model_id == 'v4_indic': | |
| return 'indic' | |
| return 'en' | |
| def change_model(model_id): | |
| """ | |
| Called when the user selects a new model. It loads the model and | |
| updates the speaker dropdown choices. | |
| """ | |
| language = get_model_details(model_id) | |
| model = load_model(model_id, language) | |
| speakers = ['random'] + model.speakers | |
| # THE FIX IS ON THIS LINE: Changed gr.Dropdown.update to gr.update | |
| return model, gr.update(choices=speakers, value='random') | |
| # --- Core TTS Function --- | |
| def generate_audio(model, text, speaker, apply_accent): | |
| """ | |
| Generates audio from text using the selected model and speaker. | |
| """ | |
| if model is None: | |
| return None, "Error: Model not loaded. Please select a model from the dropdown." | |
| if not text.strip(): | |
| return None, "Please enter some text to generate audio." | |
| print(f"Generating audio for: '{text}' with speaker: '{speaker}'") | |
| sample_rate = 48000 | |
| try: | |
| audio_tensor = model.apply_tts(text=text, | |
| speaker=speaker, | |
| sample_rate=sample_rate, | |
| put_accent=apply_accent) | |
| return (sample_rate, audio_tensor.numpy()), f"Successfully generated for: '{text}'" | |
| except Exception as e: | |
| print(f"Error during TTS generation: {e}") | |
| return None, f"An error occurred: {e}" | |
| # --- Load the initial model at startup --- | |
| initial_model = load_model(DEFAULT_MODEL_ID, get_model_details(DEFAULT_MODEL_ID)) | |
| initial_speakers = ['random'] + initial_model.speakers | |
| # --- Gradio UI Definition --- | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # ποΈ Silero Multi-Model Text-to-Speech | |
| Select a model, choose a speaker, and enter text to generate speech. | |
| **Note:** `v3` models support [SSML tags](https://github.com/snakers4/silero-models?tab=readme-ov-file#ssml-support) for advanced control (e.g., `<speak><prosody rate='x-slow'>slow speech</prosody></speak>`). | |
| """ | |
| ) | |
| model_state = gr.State(initial_model) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| model_selector = gr.Dropdown( | |
| label="Select Model", | |
| choices=['v3_en_indic', 'v4_indic', 'v3_en'], | |
| value=DEFAULT_MODEL_ID | |
| ) | |
| text_input = gr.Textbox( | |
| label="Text to Synthesize (Supports SSML for V3 models)", | |
| placeholder="Hello, welcome to my text to speech app.", | |
| lines=4 | |
| ) | |
| speaker_dropdown = gr.Dropdown( | |
| label="Select Speaker Voice", | |
| choices=initial_speakers, | |
| value='random' | |
| ) | |
| accent_checkbox = gr.Checkbox(label="Apply Accent Stress", value=True) | |
| generate_btn = gr.Button("Generate Audio", variant="primary") | |
| with gr.Column(scale=1): | |
| status_text = gr.Textbox(label="Status", interactive=False) | |
| audio_output = gr.Audio(label="Generated Audio") | |
| # --- Event Handling --- | |
| model_selector.change( | |
| fn=change_model, | |
| inputs=model_selector, | |
| outputs=[model_state, speaker_dropdown] | |
| ) | |
| generate_btn.click( | |
| fn=generate_audio, | |
| inputs=[model_state, text_input, speaker_dropdown, accent_checkbox], | |
| outputs=[audio_output, status_text] | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["v3_en_indic", "Welcome to this demonstration of advanced speech synthesis technology.", "random", True], | |
| ["v4_indic", "Aapka shubh naam kya hai?", "hindi_female", True], | |
| ["v3_en", "I live in India", "en_10", True], | |
| ], | |
| inputs=[model_selector, text_input, speaker_dropdown, accent_checkbox], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |