Dionyssos commited on
Commit
28baddf
·
1 Parent(s): 275056e
Files changed (3) hide show
  1. README.md +2 -2
  2. app.py +22 -53
  3. tts.py +1 -1
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: affective TTS
3
- emoji: 💤
4
  colorFrom: gray
5
  colorTo: gray
6
  sdk: gradio
 
1
  ---
2
+ title: heritage TTS
3
+ emoji: 🏛️
4
  colorFrom: gray
5
  colorTo: gray
6
  sdk: gradio
app.py CHANGED
@@ -9,13 +9,8 @@ import audiofile
9
  from tts import StyleTTS2
10
  from textual import only_greek_or_only_latin, transliterate_number, fix_vocals
11
  import textwrap
12
- import nltk
13
- from audionar import VitsModel, VitsTokenizer
14
-
15
 
16
- nltk.download('punkt', download_dir='./')
17
- nltk.download('punkt_tab', download_dir='./')
18
- nltk.data.path.append('.')
19
 
20
 
21
 
@@ -134,7 +129,7 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
134
  'it_IT_mls_844.wav',
135
  'en_US_vctk_p269.wav',
136
  'en_US_vctk_p285.wav',
137
- 'de_DE_m-ailabs_angela_merkel.wav',
138
  'en_US_vctk_p316.wav',
139
  'en_US_vctk_p362.wav',
140
  'jv_ID_google-gmu_06207.wav',
@@ -149,8 +144,8 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
149
  'en_US_vctk_p248.wav',
150
  'en_US_vctk_p287.wav',
151
  'it_IT_mls_9772.wav',
152
- 'te_IN_cmu-indic_sk.wav',
153
- 'tn_ZA_google-nwu_8333.wav',
154
  'en_US_vctk_p260.wav',
155
  'en_US_vctk_p247.wav',
156
  'en_US_vctk_p329.wav',
@@ -166,10 +161,10 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
166
  'it_IT_mls_7440.wav',
167
  'en_US_vctk_p310.wav',
168
  'en_US_vctk_p237.wav',
169
- 'en_US_hifi-tts_92.wav',
170
- 'en_US_cmu_arctic_aew.wav',
171
- 'ne_NP_ne-google_2099.wav',
172
- 'en_US_vctk_p226.wav',
173
  'af_ZA_google-nwu_1919.wav',
174
  'jv_ID_google-gmu_03727.wav',
175
  'en_US_vctk_p317.wav',
@@ -262,7 +257,7 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
262
  'tn_ZA_google-nwu_7866.wav',
263
  'en_US_vctk_p300.wav',
264
  'ne_NP_ne-google_0649.wav',
265
- 'es_ES_carlfm.wav',
266
  'jv_ID_google-gmu_06510.wav',
267
  'de_DE_m-ailabs_rebecca_braunert_plunkett.wav',
268
  'en_US_vctk_p340.wav',
@@ -280,10 +275,10 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
280
  'ru_RU_multi_minaev.wav',
281
  'sw_lanfrica.wav',
282
  'en_US_vctk_p271.wav',
283
- 'tn_ZA_google-nwu_0441.wav',
284
- 'it_IT_mls_6001.wav',
285
- 'en_US_vctk_p305.wav',
286
- 'it_IT_mls_8828.wav',
287
  'jv_ID_google-gmu_08002.wav',
288
  'it_IT_mls_2033.wav',
289
  'tn_ZA_google-nwu_3629.wav',
@@ -321,35 +316,18 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
321
  'bn_multi_4046.wav',
322
  'en_US_vctk_p288.wav',
323
  'en_US_vctk_p251.wav',
324
- 'es_ES_m-ailabs_tux.wav',
325
  'tn_ZA_google-nwu_6206.wav',
326
  'bn_multi_9169.wav',
327
- # 'en_US_vctk_p293.wav',
328
- # 'en_US_vctk_p255.wav',
329
- 'af_ZA_google-nwu_8963.wav',
330
- # 'en_US_vctk_p265.wav',
331
- 'gu_IN_cmu-indic_cmu_indic_guj_ad.wav',
332
- 'jv_ID_google-gmu_07335.wav',
333
- 'en_US_vctk_p323.wav',
334
- 'en_US_vctk_p281.wav',
335
- 'en_US_cmu_arctic_bdl.wav',
336
- 'en_US_m-ailabs_judy_bieber.wav',
337
  'it_IT_mls_10446.wav',
338
  'en_US_vctk_p261.wav',
339
  'en_US_vctk_p292.wav',
340
- 'te_IN_cmu-indic_ss.wav',
341
- 'en_US_vctk_p311.wav',
342
- 'it_IT_mls_12428.wav',
343
- 'en_US_cmu_arctic_aup.wav',
344
- 'jv_ID_google-gmu_04679.wav',
345
- 'it_IT_mls_4971.wav',
346
- 'en_US_cmu_arctic_ljm.wav',
347
  'fa_haaniye.wav',
348
  'en_US_vctk_p339.wav',
349
  'tn_ZA_google-nwu_7896.wav',
350
  'en_US_vctk_p253.wav',
351
  'it_IT_mls_5421.wav',
352
- # 'ne_NP_ne-google_0546.wav',
353
  'vi_VN_vais1000.wav',
354
  'en_US_vctk_p229.wav',
355
  'en_US_vctk_p254.wav',
@@ -357,12 +335,8 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
357
  'it_IT_mls_7936.wav',
358
  'en_US_vctk_p301.wav',
359
  'tn_ZA_google-nwu_0045.wav',
360
- # 'it_IT_mls_659.wav',
361
- 'tn_ZA_google-nwu_7674.wav',
362
- # 'it_IT_mls_12804.wav',
363
- # 'el_GR_rapunzelina.wav',
364
- 'en_US_hifi-tts_6097.wav',
365
- 'en_US_vctk_p257.wav',
366
  'jv_ID_google-gmu_07875.wav',
367
  'it_IT_mls_1157.wav',
368
  'it_IT_mls_643.wav',
@@ -446,25 +420,20 @@ _tts = StyleTTS2().to('cpu')
446
 
447
 
448
  with gr.Blocks(theme='huggingface') as demo:
449
- with gr.Row():
450
  text_input = gr.Textbox(
451
  label="Type text for TTS:",
452
  placeholder="Type Text for TTS",
453
  lines=4,
454
- value='Η γρηγορη καφετι αλεπου πειδαει πανω απο τον τεμπελη σκυλο.',
455
- )
456
  choice_dropdown = gr.Dropdown(
457
  choices=language_names + VOICES,
458
  label="Vox",
459
- value=language_names[0]
460
- )
461
  generate_button = gr.Button("Generate Audio", variant="primary")
462
-
463
- output_audio = gr.Audio(label="TTS Output")
464
-
465
  generate_button.click(
466
  fn=audionar_tts,
467
  inputs=[text_input, choice_dropdown],
468
- outputs=[output_audio]
469
- )
470
  demo.launch(debug=True)
 
9
  from tts import StyleTTS2
10
  from textual import only_greek_or_only_latin, transliterate_number, fix_vocals
11
  import textwrap
 
 
 
12
 
13
+ from audionar import VitsModel, VitsTokenizer
 
 
14
 
15
 
16
 
 
129
  'it_IT_mls_844.wav',
130
  'en_US_vctk_p269.wav',
131
  'en_US_vctk_p285.wav',
132
+ # 'de_DE_m-ailabs_angela_merkel.wav',
133
  'en_US_vctk_p316.wav',
134
  'en_US_vctk_p362.wav',
135
  'jv_ID_google-gmu_06207.wav',
 
144
  'en_US_vctk_p248.wav',
145
  'en_US_vctk_p287.wav',
146
  'it_IT_mls_9772.wav',
147
+ # 'te_IN_cmu-indic_sk.wav',
148
+ # 'tn_ZA_google-nwu_8333.wav',
149
  'en_US_vctk_p260.wav',
150
  'en_US_vctk_p247.wav',
151
  'en_US_vctk_p329.wav',
 
161
  'it_IT_mls_7440.wav',
162
  'en_US_vctk_p310.wav',
163
  'en_US_vctk_p237.wav',
164
+ # 'en_US_hifi-tts_92.wav',
165
+ # 'en_US_cmu_arctic_aew.wav',
166
+ # 'ne_NP_ne-google_2099.wav',
167
+ # 'en_US_vctk_p226.wav',
168
  'af_ZA_google-nwu_1919.wav',
169
  'jv_ID_google-gmu_03727.wav',
170
  'en_US_vctk_p317.wav',
 
257
  'tn_ZA_google-nwu_7866.wav',
258
  'en_US_vctk_p300.wav',
259
  'ne_NP_ne-google_0649.wav',
260
+ # 'es_ES_carlfm.wav',
261
  'jv_ID_google-gmu_06510.wav',
262
  'de_DE_m-ailabs_rebecca_braunert_plunkett.wav',
263
  'en_US_vctk_p340.wav',
 
275
  'ru_RU_multi_minaev.wav',
276
  'sw_lanfrica.wav',
277
  'en_US_vctk_p271.wav',
278
+ # 'tn_ZA_google-nwu_0441.wav',
279
+ # 'it_IT_mls_6001.wav',
280
+ # 'en_US_vctk_p305.wav',
281
+ # 'it_IT_mls_8828.wav',
282
  'jv_ID_google-gmu_08002.wav',
283
  'it_IT_mls_2033.wav',
284
  'tn_ZA_google-nwu_3629.wav',
 
316
  'bn_multi_4046.wav',
317
  'en_US_vctk_p288.wav',
318
  'en_US_vctk_p251.wav',
 
319
  'tn_ZA_google-nwu_6206.wav',
320
  'bn_multi_9169.wav',
321
+ 'en_US_vctk_p323.wav',
322
+ 'en_US_m-ailabs_judy_bieber.wav',
 
 
 
 
 
 
 
 
323
  'it_IT_mls_10446.wav',
324
  'en_US_vctk_p261.wav',
325
  'en_US_vctk_p292.wav',
 
 
 
 
 
 
 
326
  'fa_haaniye.wav',
327
  'en_US_vctk_p339.wav',
328
  'tn_ZA_google-nwu_7896.wav',
329
  'en_US_vctk_p253.wav',
330
  'it_IT_mls_5421.wav',
 
331
  'vi_VN_vais1000.wav',
332
  'en_US_vctk_p229.wav',
333
  'en_US_vctk_p254.wav',
 
335
  'it_IT_mls_7936.wav',
336
  'en_US_vctk_p301.wav',
337
  'tn_ZA_google-nwu_0045.wav',
338
+ 'tn_ZA_google-nwu_7674.wav',
339
+ 'en_US_hifi-tts_6097.wav',
 
 
 
 
340
  'jv_ID_google-gmu_07875.wav',
341
  'it_IT_mls_1157.wav',
342
  'it_IT_mls_643.wav',
 
420
 
421
 
422
  with gr.Blocks(theme='huggingface') as demo:
423
+ with gr.Column():
424
  text_input = gr.Textbox(
425
  label="Type text for TTS:",
426
  placeholder="Type Text for TTS",
427
  lines=4,
428
+ value='Η γρηγορη καφετι αλεπου πειδαει πανω απο τον τεμ��ελη σκυλο.')
 
429
  choice_dropdown = gr.Dropdown(
430
  choices=language_names + VOICES,
431
  label="Vox",
432
+ value=language_names[0])
 
433
  generate_button = gr.Button("Generate Audio", variant="primary")
434
+ output_audio = gr.Audio(label="TTS Output")
 
 
435
  generate_button.click(
436
  fn=audionar_tts,
437
  inputs=[text_input, choice_dropdown],
438
+ outputs=[output_audio])
 
439
  demo.launch(debug=True)
tts.py CHANGED
@@ -167,7 +167,7 @@ class StyleTTS2(nn.Module):
167
 
168
  _translator = str.maketrans('', '', string.punctuation)
169
 
170
- text = [sub_sent.translate(_translator) + '.' for sub_sent in textwrap.wrap(text, 74)]
171
 
172
  # # text = nltk.sent_tokenize(text)
173
  # # text = [i for sent in sentences for i in textwrap.wrap(sent, width=120)]
 
167
 
168
  _translator = str.maketrans('', '', string.punctuation)
169
 
170
+ text = [sub_sent.translate(_translator) + '.' for sub_sent in textwrap.wrap(text, 294)]
171
 
172
  # # text = nltk.sent_tokenize(text)
173
  # # text = [i for sent in sentences for i in textwrap.wrap(sent, width=120)]