Spaces:

Dionyssos
/

SHIFT

Running

App Files Files Community

Dionyssos commited on Sep 14

Commit

28baddf

1 Parent(s): 275056e

tx

Browse files

Files changed (3) hide show

README.md +2 -2
app.py +22 -53
tts.py +1 -1

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: affective TTS
-emoji: 💤
 colorFrom: gray
 colorTo: gray
 sdk: gradio

 ---
+title: heritage TTS
+emoji: 🏛️
 colorFrom: gray
 colorTo: gray
 sdk: gradio

app.py CHANGED Viewed

@@ -9,13 +9,8 @@ import audiofile
 from tts import StyleTTS2
 from textual import only_greek_or_only_latin, transliterate_number, fix_vocals
 import textwrap
-import nltk
-from audionar import VitsModel, VitsTokenizer
-nltk.download('punkt', download_dir='./')
-nltk.download('punkt_tab', download_dir='./')
-nltk.data.path.append('.')
@@ -134,7 +129,7 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
         'it_IT_mls_844.wav',
         'en_US_vctk_p269.wav',
         'en_US_vctk_p285.wav',
-        'de_DE_m-ailabs_angela_merkel.wav',
         'en_US_vctk_p316.wav',
         'en_US_vctk_p362.wav',
         'jv_ID_google-gmu_06207.wav',
@@ -149,8 +144,8 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
         'en_US_vctk_p248.wav',
         'en_US_vctk_p287.wav',
         'it_IT_mls_9772.wav',
-        'te_IN_cmu-indic_sk.wav',
-        'tn_ZA_google-nwu_8333.wav',
         'en_US_vctk_p260.wav',
         'en_US_vctk_p247.wav',
         'en_US_vctk_p329.wav',
@@ -166,10 +161,10 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
         'it_IT_mls_7440.wav',
         'en_US_vctk_p310.wav',
         'en_US_vctk_p237.wav',
-        'en_US_hifi-tts_92.wav',
-        'en_US_cmu_arctic_aew.wav',
-        'ne_NP_ne-google_2099.wav',
-        'en_US_vctk_p226.wav',
         'af_ZA_google-nwu_1919.wav',
         'jv_ID_google-gmu_03727.wav',
         'en_US_vctk_p317.wav',
@@ -262,7 +257,7 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
         'tn_ZA_google-nwu_7866.wav',
         'en_US_vctk_p300.wav',
         'ne_NP_ne-google_0649.wav',
-        'es_ES_carlfm.wav',
         'jv_ID_google-gmu_06510.wav',
         'de_DE_m-ailabs_rebecca_braunert_plunkett.wav',
         'en_US_vctk_p340.wav',
@@ -280,10 +275,10 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
         'ru_RU_multi_minaev.wav',
         'sw_lanfrica.wav',
         'en_US_vctk_p271.wav',
-        'tn_ZA_google-nwu_0441.wav',
-        'it_IT_mls_6001.wav',
-        'en_US_vctk_p305.wav',
-        'it_IT_mls_8828.wav',
         'jv_ID_google-gmu_08002.wav',
         'it_IT_mls_2033.wav',
         'tn_ZA_google-nwu_3629.wav',
@@ -321,35 +316,18 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
         'bn_multi_4046.wav',
         'en_US_vctk_p288.wav',
         'en_US_vctk_p251.wav',
-        'es_ES_m-ailabs_tux.wav',
         'tn_ZA_google-nwu_6206.wav',
         'bn_multi_9169.wav',
-        # 'en_US_vctk_p293.wav',
-        # 'en_US_vctk_p255.wav',
-        'af_ZA_google-nwu_8963.wav',
-        # 'en_US_vctk_p265.wav',
-        'gu_IN_cmu-indic_cmu_indic_guj_ad.wav',
-        'jv_ID_google-gmu_07335.wav',
-        'en_US_vctk_p323.wav',
-        'en_US_vctk_p281.wav',
-        'en_US_cmu_arctic_bdl.wav',
-        'en_US_m-ailabs_judy_bieber.wav',
         'it_IT_mls_10446.wav',
         'en_US_vctk_p261.wav',
         'en_US_vctk_p292.wav',
-        'te_IN_cmu-indic_ss.wav',
-        'en_US_vctk_p311.wav',
-        'it_IT_mls_12428.wav',
-        'en_US_cmu_arctic_aup.wav',
-        'jv_ID_google-gmu_04679.wav',
-        'it_IT_mls_4971.wav',
-        'en_US_cmu_arctic_ljm.wav',
         'fa_haaniye.wav',
         'en_US_vctk_p339.wav',
         'tn_ZA_google-nwu_7896.wav',
         'en_US_vctk_p253.wav',
         'it_IT_mls_5421.wav',
-        # 'ne_NP_ne-google_0546.wav',
         'vi_VN_vais1000.wav',
         'en_US_vctk_p229.wav',
         'en_US_vctk_p254.wav',
@@ -357,12 +335,8 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
         'it_IT_mls_7936.wav',
         'en_US_vctk_p301.wav',
         'tn_ZA_google-nwu_0045.wav',
-        # 'it_IT_mls_659.wav',
-        'tn_ZA_google-nwu_7674.wav',
-        # 'it_IT_mls_12804.wav',
-        # 'el_GR_rapunzelina.wav',
-        'en_US_hifi-tts_6097.wav',
-        'en_US_vctk_p257.wav',
         'jv_ID_google-gmu_07875.wav',
         'it_IT_mls_1157.wav',
         'it_IT_mls_643.wav',
@@ -446,25 +420,20 @@ _tts = StyleTTS2().to('cpu')
 with gr.Blocks(theme='huggingface') as demo:
-    with gr.Row():
         text_input = gr.Textbox(
             label="Type text for TTS:",
             placeholder="Type Text for TTS",
             lines=4,
-            value='Η γρηγορη καφετι αλεπου πειδαει πανω απο τον τεμπελη σκυλο.',
-        )
         choice_dropdown = gr.Dropdown(
             choices=language_names + VOICES,
             label="Vox",
-            value=language_names[0]
-        )
         generate_button = gr.Button("Generate Audio", variant="primary")
-    output_audio = gr.Audio(label="TTS Output")
     generate_button.click(
         fn=audionar_tts,
         inputs=[text_input, choice_dropdown],
-        outputs=[output_audio]
-    )
 demo.launch(debug=True)

 from tts import StyleTTS2
 from textual import only_greek_or_only_latin, transliterate_number, fix_vocals
 import textwrap
+from audionar import VitsModel, VitsTokenizer
         'it_IT_mls_844.wav',
         'en_US_vctk_p269.wav',
         'en_US_vctk_p285.wav',
+        # 'de_DE_m-ailabs_angela_merkel.wav',
         'en_US_vctk_p316.wav',
         'en_US_vctk_p362.wav',
         'jv_ID_google-gmu_06207.wav',
         'en_US_vctk_p248.wav',
         'en_US_vctk_p287.wav',
         'it_IT_mls_9772.wav',
+        # 'te_IN_cmu-indic_sk.wav',
+        # 'tn_ZA_google-nwu_8333.wav',
         'en_US_vctk_p260.wav',
         'en_US_vctk_p247.wav',
         'en_US_vctk_p329.wav',
         'it_IT_mls_7440.wav',
         'en_US_vctk_p310.wav',
         'en_US_vctk_p237.wav',
+        # 'en_US_hifi-tts_92.wav',
+        # 'en_US_cmu_arctic_aew.wav',
+        # 'ne_NP_ne-google_2099.wav',
+        # 'en_US_vctk_p226.wav',
         'af_ZA_google-nwu_1919.wav',
         'jv_ID_google-gmu_03727.wav',
         'en_US_vctk_p317.wav',
         'tn_ZA_google-nwu_7866.wav',
         'en_US_vctk_p300.wav',
         'ne_NP_ne-google_0649.wav',
+        # 'es_ES_carlfm.wav',
         'jv_ID_google-gmu_06510.wav',
         'de_DE_m-ailabs_rebecca_braunert_plunkett.wav',
         'en_US_vctk_p340.wav',
         'ru_RU_multi_minaev.wav',
         'sw_lanfrica.wav',
         'en_US_vctk_p271.wav',
+        # 'tn_ZA_google-nwu_0441.wav',
+        # 'it_IT_mls_6001.wav',
+        # 'en_US_vctk_p305.wav',
+        # 'it_IT_mls_8828.wav',
         'jv_ID_google-gmu_08002.wav',
         'it_IT_mls_2033.wav',
         'tn_ZA_google-nwu_3629.wav',
         'bn_multi_4046.wav',
         'en_US_vctk_p288.wav',
         'en_US_vctk_p251.wav',
         'tn_ZA_google-nwu_6206.wav',
         'bn_multi_9169.wav',
+        'en_US_vctk_p323.wav',
+        'en_US_m-ailabs_judy_bieber.wav',
         'it_IT_mls_10446.wav',
         'en_US_vctk_p261.wav',
         'en_US_vctk_p292.wav',
         'fa_haaniye.wav',
         'en_US_vctk_p339.wav',
         'tn_ZA_google-nwu_7896.wav',
         'en_US_vctk_p253.wav',
         'it_IT_mls_5421.wav',
         'vi_VN_vais1000.wav',
         'en_US_vctk_p229.wav',
         'en_US_vctk_p254.wav',
         'it_IT_mls_7936.wav',
         'en_US_vctk_p301.wav',
         'tn_ZA_google-nwu_0045.wav',
+        'tn_ZA_google-nwu_7674.wav',
+        'en_US_hifi-tts_6097.wav',
         'jv_ID_google-gmu_07875.wav',
         'it_IT_mls_1157.wav',
         'it_IT_mls_643.wav',
 with gr.Blocks(theme='huggingface') as demo:
+    with gr.Column():
         text_input = gr.Textbox(
             label="Type text for TTS:",
             placeholder="Type Text for TTS",
             lines=4,
+            value='Η γρηγορη καφετι αλεπου πειδαει πανω απο τον τεμ��ελη σκυλο.')
         choice_dropdown = gr.Dropdown(
             choices=language_names + VOICES,
             label="Vox",
+            value=language_names[0])
         generate_button = gr.Button("Generate Audio", variant="primary")
+        output_audio = gr.Audio(label="TTS Output")
     generate_button.click(
         fn=audionar_tts,
         inputs=[text_input, choice_dropdown],
+        outputs=[output_audio])
 demo.launch(debug=True)

tts.py CHANGED Viewed

@@ -167,7 +167,7 @@ class StyleTTS2(nn.Module):
             _translator = str.maketrans('', '', string.punctuation)
-            text = [sub_sent.translate(_translator) + '.' for sub_sent in textwrap.wrap(text, 74)]
             # # text = nltk.sent_tokenize(text)
             # # text = [i for sent in sentences for i in textwrap.wrap(sent, width=120)]

             _translator = str.maketrans('', '', string.punctuation)
+            text = [sub_sent.translate(_translator) + '.' for sub_sent in textwrap.wrap(text, 294)]
             # # text = nltk.sent_tokenize(text)
             # # text = [i for sent in sentences for i in textwrap.wrap(sent, width=120)]