tx
Browse files
README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: gray
|
| 5 |
colorTo: gray
|
| 6 |
sdk: gradio
|
|
|
|
| 1 |
---
|
| 2 |
+
title: heritage TTS
|
| 3 |
+
emoji: 🏛️
|
| 4 |
colorFrom: gray
|
| 5 |
colorTo: gray
|
| 6 |
sdk: gradio
|
app.py
CHANGED
|
@@ -9,13 +9,8 @@ import audiofile
|
|
| 9 |
from tts import StyleTTS2
|
| 10 |
from textual import only_greek_or_only_latin, transliterate_number, fix_vocals
|
| 11 |
import textwrap
|
| 12 |
-
import nltk
|
| 13 |
-
from audionar import VitsModel, VitsTokenizer
|
| 14 |
-
|
| 15 |
|
| 16 |
-
|
| 17 |
-
nltk.download('punkt_tab', download_dir='./')
|
| 18 |
-
nltk.data.path.append('.')
|
| 19 |
|
| 20 |
|
| 21 |
|
|
@@ -134,7 +129,7 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
|
|
| 134 |
'it_IT_mls_844.wav',
|
| 135 |
'en_US_vctk_p269.wav',
|
| 136 |
'en_US_vctk_p285.wav',
|
| 137 |
-
'de_DE_m-ailabs_angela_merkel.wav',
|
| 138 |
'en_US_vctk_p316.wav',
|
| 139 |
'en_US_vctk_p362.wav',
|
| 140 |
'jv_ID_google-gmu_06207.wav',
|
|
@@ -149,8 +144,8 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
|
|
| 149 |
'en_US_vctk_p248.wav',
|
| 150 |
'en_US_vctk_p287.wav',
|
| 151 |
'it_IT_mls_9772.wav',
|
| 152 |
-
'te_IN_cmu-indic_sk.wav',
|
| 153 |
-
'tn_ZA_google-nwu_8333.wav',
|
| 154 |
'en_US_vctk_p260.wav',
|
| 155 |
'en_US_vctk_p247.wav',
|
| 156 |
'en_US_vctk_p329.wav',
|
|
@@ -166,10 +161,10 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
|
|
| 166 |
'it_IT_mls_7440.wav',
|
| 167 |
'en_US_vctk_p310.wav',
|
| 168 |
'en_US_vctk_p237.wav',
|
| 169 |
-
'en_US_hifi-tts_92.wav',
|
| 170 |
-
'en_US_cmu_arctic_aew.wav',
|
| 171 |
-
'ne_NP_ne-google_2099.wav',
|
| 172 |
-
'en_US_vctk_p226.wav',
|
| 173 |
'af_ZA_google-nwu_1919.wav',
|
| 174 |
'jv_ID_google-gmu_03727.wav',
|
| 175 |
'en_US_vctk_p317.wav',
|
|
@@ -262,7 +257,7 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
|
|
| 262 |
'tn_ZA_google-nwu_7866.wav',
|
| 263 |
'en_US_vctk_p300.wav',
|
| 264 |
'ne_NP_ne-google_0649.wav',
|
| 265 |
-
'es_ES_carlfm.wav',
|
| 266 |
'jv_ID_google-gmu_06510.wav',
|
| 267 |
'de_DE_m-ailabs_rebecca_braunert_plunkett.wav',
|
| 268 |
'en_US_vctk_p340.wav',
|
|
@@ -280,10 +275,10 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
|
|
| 280 |
'ru_RU_multi_minaev.wav',
|
| 281 |
'sw_lanfrica.wav',
|
| 282 |
'en_US_vctk_p271.wav',
|
| 283 |
-
'tn_ZA_google-nwu_0441.wav',
|
| 284 |
-
'it_IT_mls_6001.wav',
|
| 285 |
-
'en_US_vctk_p305.wav',
|
| 286 |
-
'it_IT_mls_8828.wav',
|
| 287 |
'jv_ID_google-gmu_08002.wav',
|
| 288 |
'it_IT_mls_2033.wav',
|
| 289 |
'tn_ZA_google-nwu_3629.wav',
|
|
@@ -321,35 +316,18 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
|
|
| 321 |
'bn_multi_4046.wav',
|
| 322 |
'en_US_vctk_p288.wav',
|
| 323 |
'en_US_vctk_p251.wav',
|
| 324 |
-
'es_ES_m-ailabs_tux.wav',
|
| 325 |
'tn_ZA_google-nwu_6206.wav',
|
| 326 |
'bn_multi_9169.wav',
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
'af_ZA_google-nwu_8963.wav',
|
| 330 |
-
# 'en_US_vctk_p265.wav',
|
| 331 |
-
'gu_IN_cmu-indic_cmu_indic_guj_ad.wav',
|
| 332 |
-
'jv_ID_google-gmu_07335.wav',
|
| 333 |
-
'en_US_vctk_p323.wav',
|
| 334 |
-
'en_US_vctk_p281.wav',
|
| 335 |
-
'en_US_cmu_arctic_bdl.wav',
|
| 336 |
-
'en_US_m-ailabs_judy_bieber.wav',
|
| 337 |
'it_IT_mls_10446.wav',
|
| 338 |
'en_US_vctk_p261.wav',
|
| 339 |
'en_US_vctk_p292.wav',
|
| 340 |
-
'te_IN_cmu-indic_ss.wav',
|
| 341 |
-
'en_US_vctk_p311.wav',
|
| 342 |
-
'it_IT_mls_12428.wav',
|
| 343 |
-
'en_US_cmu_arctic_aup.wav',
|
| 344 |
-
'jv_ID_google-gmu_04679.wav',
|
| 345 |
-
'it_IT_mls_4971.wav',
|
| 346 |
-
'en_US_cmu_arctic_ljm.wav',
|
| 347 |
'fa_haaniye.wav',
|
| 348 |
'en_US_vctk_p339.wav',
|
| 349 |
'tn_ZA_google-nwu_7896.wav',
|
| 350 |
'en_US_vctk_p253.wav',
|
| 351 |
'it_IT_mls_5421.wav',
|
| 352 |
-
# 'ne_NP_ne-google_0546.wav',
|
| 353 |
'vi_VN_vais1000.wav',
|
| 354 |
'en_US_vctk_p229.wav',
|
| 355 |
'en_US_vctk_p254.wav',
|
|
@@ -357,12 +335,8 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
|
|
| 357 |
'it_IT_mls_7936.wav',
|
| 358 |
'en_US_vctk_p301.wav',
|
| 359 |
'tn_ZA_google-nwu_0045.wav',
|
| 360 |
-
|
| 361 |
-
'
|
| 362 |
-
# 'it_IT_mls_12804.wav',
|
| 363 |
-
# 'el_GR_rapunzelina.wav',
|
| 364 |
-
'en_US_hifi-tts_6097.wav',
|
| 365 |
-
'en_US_vctk_p257.wav',
|
| 366 |
'jv_ID_google-gmu_07875.wav',
|
| 367 |
'it_IT_mls_1157.wav',
|
| 368 |
'it_IT_mls_643.wav',
|
|
@@ -446,25 +420,20 @@ _tts = StyleTTS2().to('cpu')
|
|
| 446 |
|
| 447 |
|
| 448 |
with gr.Blocks(theme='huggingface') as demo:
|
| 449 |
-
with gr.
|
| 450 |
text_input = gr.Textbox(
|
| 451 |
label="Type text for TTS:",
|
| 452 |
placeholder="Type Text for TTS",
|
| 453 |
lines=4,
|
| 454 |
-
value='Η γρηγορη καφετι αλεπου πειδαει πανω απο τον
|
| 455 |
-
)
|
| 456 |
choice_dropdown = gr.Dropdown(
|
| 457 |
choices=language_names + VOICES,
|
| 458 |
label="Vox",
|
| 459 |
-
value=language_names[0]
|
| 460 |
-
)
|
| 461 |
generate_button = gr.Button("Generate Audio", variant="primary")
|
| 462 |
-
|
| 463 |
-
output_audio = gr.Audio(label="TTS Output")
|
| 464 |
-
|
| 465 |
generate_button.click(
|
| 466 |
fn=audionar_tts,
|
| 467 |
inputs=[text_input, choice_dropdown],
|
| 468 |
-
outputs=[output_audio]
|
| 469 |
-
)
|
| 470 |
demo.launch(debug=True)
|
|
|
|
| 9 |
from tts import StyleTTS2
|
| 10 |
from textual import only_greek_or_only_latin, transliterate_number, fix_vocals
|
| 11 |
import textwrap
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
from audionar import VitsModel, VitsTokenizer
|
|
|
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
|
|
|
|
| 129 |
'it_IT_mls_844.wav',
|
| 130 |
'en_US_vctk_p269.wav',
|
| 131 |
'en_US_vctk_p285.wav',
|
| 132 |
+
# 'de_DE_m-ailabs_angela_merkel.wav',
|
| 133 |
'en_US_vctk_p316.wav',
|
| 134 |
'en_US_vctk_p362.wav',
|
| 135 |
'jv_ID_google-gmu_06207.wav',
|
|
|
|
| 144 |
'en_US_vctk_p248.wav',
|
| 145 |
'en_US_vctk_p287.wav',
|
| 146 |
'it_IT_mls_9772.wav',
|
| 147 |
+
# 'te_IN_cmu-indic_sk.wav',
|
| 148 |
+
# 'tn_ZA_google-nwu_8333.wav',
|
| 149 |
'en_US_vctk_p260.wav',
|
| 150 |
'en_US_vctk_p247.wav',
|
| 151 |
'en_US_vctk_p329.wav',
|
|
|
|
| 161 |
'it_IT_mls_7440.wav',
|
| 162 |
'en_US_vctk_p310.wav',
|
| 163 |
'en_US_vctk_p237.wav',
|
| 164 |
+
# 'en_US_hifi-tts_92.wav',
|
| 165 |
+
# 'en_US_cmu_arctic_aew.wav',
|
| 166 |
+
# 'ne_NP_ne-google_2099.wav',
|
| 167 |
+
# 'en_US_vctk_p226.wav',
|
| 168 |
'af_ZA_google-nwu_1919.wav',
|
| 169 |
'jv_ID_google-gmu_03727.wav',
|
| 170 |
'en_US_vctk_p317.wav',
|
|
|
|
| 257 |
'tn_ZA_google-nwu_7866.wav',
|
| 258 |
'en_US_vctk_p300.wav',
|
| 259 |
'ne_NP_ne-google_0649.wav',
|
| 260 |
+
# 'es_ES_carlfm.wav',
|
| 261 |
'jv_ID_google-gmu_06510.wav',
|
| 262 |
'de_DE_m-ailabs_rebecca_braunert_plunkett.wav',
|
| 263 |
'en_US_vctk_p340.wav',
|
|
|
|
| 275 |
'ru_RU_multi_minaev.wav',
|
| 276 |
'sw_lanfrica.wav',
|
| 277 |
'en_US_vctk_p271.wav',
|
| 278 |
+
# 'tn_ZA_google-nwu_0441.wav',
|
| 279 |
+
# 'it_IT_mls_6001.wav',
|
| 280 |
+
# 'en_US_vctk_p305.wav',
|
| 281 |
+
# 'it_IT_mls_8828.wav',
|
| 282 |
'jv_ID_google-gmu_08002.wav',
|
| 283 |
'it_IT_mls_2033.wav',
|
| 284 |
'tn_ZA_google-nwu_3629.wav',
|
|
|
|
| 316 |
'bn_multi_4046.wav',
|
| 317 |
'en_US_vctk_p288.wav',
|
| 318 |
'en_US_vctk_p251.wav',
|
|
|
|
| 319 |
'tn_ZA_google-nwu_6206.wav',
|
| 320 |
'bn_multi_9169.wav',
|
| 321 |
+
'en_US_vctk_p323.wav',
|
| 322 |
+
'en_US_m-ailabs_judy_bieber.wav',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
'it_IT_mls_10446.wav',
|
| 324 |
'en_US_vctk_p261.wav',
|
| 325 |
'en_US_vctk_p292.wav',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
'fa_haaniye.wav',
|
| 327 |
'en_US_vctk_p339.wav',
|
| 328 |
'tn_ZA_google-nwu_7896.wav',
|
| 329 |
'en_US_vctk_p253.wav',
|
| 330 |
'it_IT_mls_5421.wav',
|
|
|
|
| 331 |
'vi_VN_vais1000.wav',
|
| 332 |
'en_US_vctk_p229.wav',
|
| 333 |
'en_US_vctk_p254.wav',
|
|
|
|
| 335 |
'it_IT_mls_7936.wav',
|
| 336 |
'en_US_vctk_p301.wav',
|
| 337 |
'tn_ZA_google-nwu_0045.wav',
|
| 338 |
+
'tn_ZA_google-nwu_7674.wav',
|
| 339 |
+
'en_US_hifi-tts_6097.wav',
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
'jv_ID_google-gmu_07875.wav',
|
| 341 |
'it_IT_mls_1157.wav',
|
| 342 |
'it_IT_mls_643.wav',
|
|
|
|
| 420 |
|
| 421 |
|
| 422 |
with gr.Blocks(theme='huggingface') as demo:
|
| 423 |
+
with gr.Column():
|
| 424 |
text_input = gr.Textbox(
|
| 425 |
label="Type text for TTS:",
|
| 426 |
placeholder="Type Text for TTS",
|
| 427 |
lines=4,
|
| 428 |
+
value='Η γρηγορη καφετι αλεπου πειδαει πανω απο τον τεμ��ελη σκυλο.')
|
|
|
|
| 429 |
choice_dropdown = gr.Dropdown(
|
| 430 |
choices=language_names + VOICES,
|
| 431 |
label="Vox",
|
| 432 |
+
value=language_names[0])
|
|
|
|
| 433 |
generate_button = gr.Button("Generate Audio", variant="primary")
|
| 434 |
+
output_audio = gr.Audio(label="TTS Output")
|
|
|
|
|
|
|
| 435 |
generate_button.click(
|
| 436 |
fn=audionar_tts,
|
| 437 |
inputs=[text_input, choice_dropdown],
|
| 438 |
+
outputs=[output_audio])
|
|
|
|
| 439 |
demo.launch(debug=True)
|
tts.py
CHANGED
|
@@ -167,7 +167,7 @@ class StyleTTS2(nn.Module):
|
|
| 167 |
|
| 168 |
_translator = str.maketrans('', '', string.punctuation)
|
| 169 |
|
| 170 |
-
text = [sub_sent.translate(_translator) + '.' for sub_sent in textwrap.wrap(text,
|
| 171 |
|
| 172 |
# # text = nltk.sent_tokenize(text)
|
| 173 |
# # text = [i for sent in sentences for i in textwrap.wrap(sent, width=120)]
|
|
|
|
| 167 |
|
| 168 |
_translator = str.maketrans('', '', string.punctuation)
|
| 169 |
|
| 170 |
+
text = [sub_sent.translate(_translator) + '.' for sub_sent in textwrap.wrap(text, 294)]
|
| 171 |
|
| 172 |
# # text = nltk.sent_tokenize(text)
|
| 173 |
# # text = [i for sent in sentences for i in textwrap.wrap(sent, width=120)]
|