Spaces:
Sleeping
Sleeping
| # Install necessary libraries | |
| import os | |
| import numpy as np | |
| from speechbrain.pretrained import Tacotron2, HIFIGAN | |
| from scipy.io.wavfile import write | |
| import streamlit as st | |
| # Load TTS and vocoder models | |
| # Cache the models to avoid reloading | |
| def load_models(): | |
| tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmp_tts") | |
| hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmp_vocoder") | |
| return tacotron2, hifi_gan | |
| tacotron2, hifi_gan = load_models() | |
| # Text-to-Speech function | |
| def text_to_speech(text): | |
| try: | |
| # Generate mel spectrogram | |
| mel_output, _, _ = tacotron2.encode_text(text) | |
| # Generate waveform from mel spectrogram | |
| waveforms = hifi_gan.decode_batch(mel_output) | |
| # Convert waveform to numpy format | |
| waveform = waveforms.squeeze().cpu().numpy() | |
| # Normalize waveform to range [-1, 1] | |
| waveform = waveform / np.max(np.abs(waveform)) | |
| # Save waveform to a .wav file | |
| output_path = "output.wav" | |
| write(output_path, 22050, (waveform * 32767).astype(np.int16)) | |
| return output_path | |
| except Exception as e: | |
| st.error(f"Error during text-to-speech generation: {e}") | |
| return None | |
| # Streamlit UI | |
| st.title("Text-to-Speech Application") | |
| st.write("Enter text below and convert it to speech!") | |
| # Input field | |
| text_input = st.text_area("Enter Text:", "Hello, welcome to the Text-to-Speech app!") | |
| if st.button("Generate Speech"): | |
| if text_input.strip(): | |
| output_audio = text_to_speech(text_input) | |
| if output_audio: | |
| st.audio(output_audio, format="audio/wav") | |
| else: | |
| st.error("Failed to generate audio. Please check the input text.") | |
| else: | |
| st.warning("Please enter some text to generate speech.") | |