Dionyssos commited on
Commit
37faf9c
·
1 Parent(s): 28baddf
Files changed (2) hide show
  1. app.py +82 -303
  2. tts.py +1 -1
app.py CHANGED
@@ -1,8 +1,6 @@
1
  # -*- coding: utf-8 -*-
2
- import typing
3
  import gradio as gr
4
  import numpy as np
5
- import os
6
  import torch
7
  import torch.nn as nn
8
  import audiofile
@@ -108,311 +106,92 @@ def audionar_tts(text=None,
108
 
109
 
110
  # TTS
111
- # VOICES = [f'wav/{vox}' for vox in os.listdir('wav')]
112
- # add unidecode (to parse non-roman characters for the StyleTTS2
113
- # # for the VITS it should better skip the unknown letters - dont use unidecode())
114
- # at generation fill the state of "last tts"
115
- # at record fill the state of "last record" and place in list of voice/langs for TTS
116
- VOICES = ['jv_ID_google-gmu_04982.wav',
117
- # 'it_IT_mls_1595.wav',
118
- 'en_US_vctk_p303.wav',
119
- 'en_US_vctk_p306.wav',
120
- 'it_IT_mls_8842.wav',
121
- 'en_US_cmu_arctic_ksp.wav',
122
- 'jv_ID_google-gmu_05970.wav',
123
- 'en_US_vctk_p318.wav',
124
- 'ha_NE_openbible.wav',
125
- 'ne_NP_ne-google_0883.wav',
126
- 'en_US_vctk_p280.wav',
127
- 'bn_multi_1010.wav',
128
- 'en_US_vctk_p259.wav',
129
- 'it_IT_mls_844.wav',
130
- 'en_US_vctk_p269.wav',
131
- 'en_US_vctk_p285.wav',
132
- # 'de_DE_m-ailabs_angela_merkel.wav',
133
- 'en_US_vctk_p316.wav',
134
- 'en_US_vctk_p362.wav',
135
- 'jv_ID_google-gmu_06207.wav',
136
- 'tn_ZA_google-nwu_9061.wav',
137
- 'fr_FR_tom.wav',
138
- 'en_US_vctk_p233.wav',
139
- 'it_IT_mls_4975.wav',
140
- 'en_US_vctk_p236.wav',
141
- # 'bn_multi_01232.wav',
142
- 'bn_multi_5958.wav',
143
- # 'it_IT_mls_9185.wav',
144
- 'en_US_vctk_p248.wav',
145
- 'en_US_vctk_p287.wav',
146
- 'it_IT_mls_9772.wav',
147
- # 'te_IN_cmu-indic_sk.wav',
148
- # 'tn_ZA_google-nwu_8333.wav',
149
- 'en_US_vctk_p260.wav',
150
- 'en_US_vctk_p247.wav',
151
- 'en_US_vctk_p329.wav',
152
- 'en_US_cmu_arctic_fem.wav',
153
- 'en_US_cmu_arctic_rms.wav',
154
- 'en_US_vctk_p308.wav',
155
- 'jv_ID_google-gmu_08736.wav',
156
- 'en_US_vctk_p245.wav',
157
- 'fr_FR_m-ailabs_nadine_eckert_boulet.wav',
158
- 'jv_ID_google-gmu_03314.wav',
159
- 'en_US_vctk_p239.wav',
160
- 'jv_ID_google-gmu_05540.wav',
161
- 'it_IT_mls_7440.wav',
162
- 'en_US_vctk_p310.wav',
163
- 'en_US_vctk_p237.wav',
164
- # 'en_US_hifi-tts_92.wav',
165
- # 'en_US_cmu_arctic_aew.wav',
166
- # 'ne_NP_ne-google_2099.wav',
167
- # 'en_US_vctk_p226.wav',
168
- 'af_ZA_google-nwu_1919.wav',
169
- 'jv_ID_google-gmu_03727.wav',
170
- 'en_US_vctk_p317.wav',
171
- 'tn_ZA_google-nwu_0378.wav',
172
- 'nl_pmk.wav',
173
- 'en_US_vctk_p286.wav',
174
- 'tn_ZA_google-nwu_3342.wav',
175
- # 'en_US_vctk_p343.wav',
176
- 'de_DE_m-ailabs_ramona_deininger.wav',
177
- 'jv_ID_google-gmu_03424.wav',
178
- 'en_US_vctk_p341.wav',
179
- 'jv_ID_google-gmu_03187.wav',
180
- 'ne_NP_ne-google_3960.wav',
181
- 'jv_ID_google-gmu_06080.wav',
182
- 'ne_NP_ne-google_3997.wav',
183
- # 'en_US_vctk_p267.wav',
184
- 'en_US_vctk_p240.wav',
185
- 'ne_NP_ne-google_5687.wav',
186
- 'ne_NP_ne-google_9407.wav',
187
- 'jv_ID_google-gmu_05667.wav',
188
- 'jv_ID_google-gmu_01519.wav',
189
- 'ne_NP_ne-google_7957.wav',
190
- 'it_IT_mls_4705.wav',
191
- 'ne_NP_ne-google_6329.wav',
192
- 'it_IT_mls_1725.wav',
193
- 'tn_ZA_google-nwu_8914.wav',
194
- 'en_US_ljspeech.wav',
195
- 'tn_ZA_google-nwu_4850.wav',
196
- 'en_US_vctk_p238.wav',
197
- 'en_US_vctk_p302.wav',
198
- 'jv_ID_google-gmu_08178.wav',
199
- 'en_US_vctk_p313.wav',
200
- # 'af_ZA_google-nwu_2418.wav',
201
- # 'bn_multi_00737.wav',
202
  'en_US_vctk_p275.wav', # y
203
- 'af_ZA_google-nwu_0184.wav',
204
- 'jv_ID_google-gmu_07638.wav',
205
- 'ne_NP_ne-google_6587.wav',
206
- 'ne_NP_ne-google_0258.wav',
207
- 'en_US_vctk_p232.wav',
208
- 'en_US_vctk_p336.wav',
209
- 'jv_ID_google-gmu_09039.wav',
210
- 'en_US_vctk_p312.wav',
211
- 'af_ZA_google-nwu_8148.wav',
212
- 'en_US_vctk_p326.wav',
213
- 'en_US_vctk_p264.wav',
214
- 'en_US_vctk_p295.wav',
215
- # 'en_US_vctk_p298.wav',
216
- 'es_ES_m-ailabs_victor_villarraza.wav',
217
- 'pl_PL_m-ailabs_nina_brown.wav',
218
- 'tn_ZA_google-nwu_9365.wav',
219
- 'en_US_vctk_p294.wav',
220
- 'jv_ID_google-gmu_00658.wav',
221
- 'jv_ID_google-gmu_08305.wav',
222
- 'en_US_vctk_p330.wav',
223
- 'gu_IN_cmu-indic_cmu_indic_guj_dp.wav',
224
- 'jv_ID_google-gmu_05219.wav',
225
- 'en_US_vctk_p284.wav',
226
- 'de_DE_m-ailabs_eva_k.wav',
227
- # 'bn_multi_00779.wav',
228
- 'en_UK_apope.wav',
229
- 'en_US_vctk_p345.wav',
230
- 'it_IT_mls_6744.wav',
231
- 'en_US_vctk_p347.wav',
232
  'en_US_m-ailabs_mary_ann.wav',
233
- 'en_US_m-ailabs_elliot_miller.wav',
234
- 'en_US_vctk_p279.wav',
235
- 'ru_RU_multi_nikolaev.wav',
236
- 'bn_multi_4811.wav',
237
- 'tn_ZA_google-nwu_7693.wav',
238
- 'bn_multi_01701.wav',
239
- 'en_US_vctk_p262.wav',
240
- # 'en_US_vctk_p266.wav',
241
- 'en_US_vctk_p243.wav',
242
- 'en_US_vctk_p297.wav',
243
- 'en_US_vctk_p278.wav',
244
- 'jv_ID_google-gmu_02059.wav',
245
- 'en_US_vctk_p231.wav',
246
- 'te_IN_cmu-indic_kpn.wav',
247
- 'en_US_vctk_p250.wav',
248
- 'it_IT_mls_4974.wav',
249
- 'en_US_cmu_arctic_awbrms.wav',
250
- # 'en_US_vctk_p263.wav',
251
- 'nl_femal.wav',
252
- 'tn_ZA_google-nwu_6116.wav',
253
- 'jv_ID_google-gmu_06383.wav',
254
- 'en_US_vctk_p225.wav',
255
- 'en_US_vctk_p228.wav',
256
- 'it_IT_mls_277.wav',
257
- 'tn_ZA_google-nwu_7866.wav',
258
- 'en_US_vctk_p300.wav',
259
- 'ne_NP_ne-google_0649.wav',
260
- # 'es_ES_carlfm.wav',
261
- 'jv_ID_google-gmu_06510.wav',
262
- 'de_DE_m-ailabs_rebecca_braunert_plunkett.wav',
263
- 'en_US_vctk_p340.wav',
264
- 'en_US_cmu_arctic_gka.wav',
265
- 'ne_NP_ne-google_2027.wav',
266
- 'jv_ID_google-gmu_09724.wav',
267
- 'en_US_vctk_p361.wav',
268
- 'ne_NP_ne-google_6834.wav',
269
- 'jv_ID_google-gmu_02326.wav',
270
- 'fr_FR_m-ailabs_zeckou.wav',
271
- 'tn_ZA_google-nwu_1932.wav',
272
- # 'female-20-happy.wav',
273
- 'tn_ZA_google-nwu_1483.wav',
274
- 'de_DE_thorsten-emotion_amused.wav',
275
- 'ru_RU_multi_minaev.wav',
276
- 'sw_lanfrica.wav',
277
- 'en_US_vctk_p271.wav',
278
- # 'tn_ZA_google-nwu_0441.wav',
279
- # 'it_IT_mls_6001.wav',
280
- # 'en_US_vctk_p305.wav',
281
- # 'it_IT_mls_8828.wav',
282
- 'jv_ID_google-gmu_08002.wav',
283
- 'it_IT_mls_2033.wav',
284
- 'tn_ZA_google-nwu_3629.wav',
285
- 'it_IT_mls_6348.wav',
286
- 'en_US_cmu_arctic_axb.wav',
287
- 'it_IT_mls_8181.wav',
288
- 'en_US_vctk_p230.wav',
289
- 'af_ZA_google-nwu_7214.wav',
290
- 'nl_nathalie.wav',
291
- 'it_IT_mls_8207.wav',
292
- 'ko_KO_kss.wav',
293
- # 'af_ZA_google-nwu_6590.wav',
294
- 'jv_ID_google-gmu_00264.wav',
295
- 'tn_ZA_google-nwu_6234.wav',
296
- 'jv_ID_google-gmu_05522.wav',
297
- 'en_US_cmu_arctic_lnh.wav',
298
- 'en_US_vctk_p272.wav',
299
- 'en_US_cmu_arctic_slp.wav',
300
- 'en_US_vctk_p299.wav',
301
- 'en_US_hifi-tts_9017.wav',
302
- 'it_IT_mls_4998.wav',
303
- 'it_IT_mls_6299.wav',
304
- 'en_US_cmu_arctic_rxr.wav',
305
- # 'female-46-neutral.wav',
306
- 'jv_ID_google-gmu_01392.wav',
307
- 'tn_ZA_google-nwu_8512.wav',
308
- 'en_US_vctk_p244.wav',
309
- # 'bn_multi_3108.wav',
310
- # 'it_IT_mls_7405.wav',
311
- # 'bn_multi_3713.wav',
312
- # 'yo_openbible.wav',
313
- # 'jv_ID_google-gmu_01932.wav',
314
- 'en_US_vctk_p270.wav',
315
- 'tn_ZA_google-nwu_6459.wav',
316
- 'bn_multi_4046.wav',
317
- 'en_US_vctk_p288.wav',
318
- 'en_US_vctk_p251.wav',
319
  'tn_ZA_google-nwu_6206.wav',
320
- 'bn_multi_9169.wav',
321
- 'en_US_vctk_p323.wav',
322
- 'en_US_m-ailabs_judy_bieber.wav',
323
- 'it_IT_mls_10446.wav',
324
- 'en_US_vctk_p261.wav',
325
- 'en_US_vctk_p292.wav',
326
- 'fa_haaniye.wav',
327
- 'en_US_vctk_p339.wav',
328
- 'tn_ZA_google-nwu_7896.wav',
329
- 'en_US_vctk_p253.wav',
330
- 'it_IT_mls_5421.wav',
331
- 'vi_VN_vais1000.wav',
332
- 'en_US_vctk_p229.wav',
333
- 'en_US_vctk_p254.wav',
334
- 'en_US_vctk_p258.wav',
335
- 'it_IT_mls_7936.wav',
336
- 'en_US_vctk_p301.wav',
337
- 'tn_ZA_google-nwu_0045.wav',
338
- 'tn_ZA_google-nwu_7674.wav',
339
- 'en_US_hifi-tts_6097.wav',
340
- 'jv_ID_google-gmu_07875.wav',
341
- 'it_IT_mls_1157.wav',
342
- 'it_IT_mls_643.wav',
343
- 'en_US_vctk_p304.wav',
344
- 'ru_RU_multi_hajdurova.wav',
345
- 'it_IT_mls_8461.wav',
346
- 'bn_multi_3958.wav',
347
- 'it_IT_mls_1989.wav',
348
- 'en_US_vctk_p249.wav',
349
- # 'bn_multi_0834.wav',
350
- 'en_US_vctk_p307.wav',
351
- 'es_ES_m-ailabs_karen_savage.wav',
352
- 'fr_FR_m-ailabs_bernard.wav',
353
- 'en_US_vctk_p252.wav',
354
- 'en_US_cmu_arctic_jmk.wav',
355
- 'en_US_vctk_p333.wav',
356
- 'tn_ZA_google-nwu_4506.wav',
357
- 'ne_NP_ne-google_0283.wav',
358
- 'de_DE_m-ailabs_karlsson.wav',
359
- 'en_US_cmu_arctic_awb.wav',
360
- 'en_US_vctk_p246.wav',
361
- # 'en_US_cmu_arctic_clb.wav',
362
- 'en_US_vctk_p364.wav',
363
- 'nl_flemishguy.wav',
364
  'en_US_vctk_p276.wav', # y
365
- # 'en_US_vctk_p274.wav',
366
- 'fr_FR_m-ailabs_gilles_g_le_blanc.wav',
367
- 'it_IT_mls_7444.wav',
368
- 'style_o22050.wav',
369
- 'en_US_vctk_s5.wav',
370
- 'en_US_vctk_p268.wav',
371
- 'it_IT_mls_6807.wav',
372
- 'it_IT_mls_2019.wav',
373
- # 'male-60-angry.wav',
374
- 'af_ZA_google-nwu_8924.wav',
375
- 'en_US_vctk_p374.wav',
376
- 'en_US_vctk_p363.wav',
377
- 'it_IT_mls_644.wav',
378
- 'ne_NP_ne-google_3614.wav',
379
- 'en_US_vctk_p241.wav',
380
- 'ne_NP_ne-google_3154.wav',
381
- 'en_US_vctk_p234.wav',
382
- 'it_IT_mls_8384.wav',
383
- 'fr_FR_m-ailabs_ezwa.wav',
384
- 'it_IT_mls_5010.wav',
385
- 'en_US_vctk_p351.wav',
386
- 'en_US_cmu_arctic_eey.wav',
387
- 'jv_ID_google-gmu_04285.wav',
388
- 'jv_ID_google-gmu_06941.wav',
389
- 'hu_HU_diana-majlinger.wav',
390
- 'tn_ZA_google-nwu_2839.wav',
391
- 'bn_multi_03042.wav',
392
- 'tn_ZA_google-nwu_5628.wav',
393
- 'it_IT_mls_4649.wav',
394
- 'af_ZA_google-nwu_7130.wav',
395
- 'en_US_cmu_arctic_slt.wav',
396
- 'jv_ID_google-gmu_04175.wav',
397
- 'gu_IN_cmu-indic_cmu_indic_guj_kt.wav',
398
- 'jv_ID_google-gmu_00027.wav',
399
- 'jv_ID_google-gmu_02884.wav',
400
- 'en_US_vctk_p360.wav',
401
- 'en_US_vctk_p334.wav',
402
- # 'male-27-sad.wav',
403
- 'tn_ZA_google-nwu_1498.wav',
404
- 'fi_FI_harri-tapani-ylilammi.wav',
405
- # 'bn_multi_rm.wav',
406
- 'ne_NP_ne-google_2139.wav',
407
- 'pl_PL_m-ailabs_piotr_nater.wav',
408
- 'fr_FR_siwis.wav',
409
- 'nl_bart-de-leeuw.wav',
410
- 'jv_ID_google-gmu_04715.wav',
411
- 'en_US_vctk_p283.wav',
412
- 'en_US_vctk_p314.wav',
413
- 'en_US_vctk_p335.wav',
414
- 'jv_ID_google-gmu_07765.wav',
415
- 'en_US_vctk_p273.wav'
416
  ]
417
  VOICES = [t[:-4] for t in VOICES] # crop .wav for visuals in gr.DropDown
418
 
 
1
  # -*- coding: utf-8 -*-
 
2
  import gradio as gr
3
  import numpy as np
 
4
  import torch
5
  import torch.nn as nn
6
  import audiofile
 
106
 
107
 
108
  # TTS
109
+
110
+ VOICES = ['jv_ID_google-gmu_04982.wav', #y
111
+ 'en_US_vctk_p303.wav', #
112
+ 'en_US_vctk_p306.wav', #,
113
+ 'en_US_vctk_p318.wav', # y
114
+ 'en_US_vctk_p269.wav', #y
115
+ 'en_US_vctk_p316.wav', #y
116
+ 'en_US_vctk_p362.wav', #y cls
117
+ 'fr_FR_tom.wav', #y
118
+ 'bn_multi_5958.wav', #y
119
+ 'en_US_vctk_p287.wav', #y
120
+ 'en_US_vctk_p260.wav', #y cl
121
+ 'en_US_cmu_arctic_fem.wav', #t
122
+ 'en_US_cmu_arctic_rms.wav', #t
123
+ 'fr_FR_m-ailabs_nadine_eckert_boulet.wav', #
124
+ 'en_US_vctk_p237.wav', #y
125
+ 'en_US_vctk_p317.wav',#
126
+ 'tn_ZA_google-nwu_0378.wav',#y
127
+ 'nl_pmk.wav',#fixst
128
+ 'tn_ZA_google-nwu_3342.wav',#
129
+ 'ne_NP_ne-google_3997.wav', #
130
+ 'tn_ZA_google-nwu_8914.wav', #t
131
+ 'en_US_vctk_p238.wav', # y
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  'en_US_vctk_p275.wav', # y
133
+ 'af_ZA_google-nwu_0184.wav',#
134
+ 'af_ZA_google-nwu_8148.wav',#y
135
+ 'en_US_vctk_p326.wav', #t
136
+ 'en_US_vctk_p264.wav', #y
137
+ 'en_US_vctk_p295.wav', #
138
+ 'en_US_vctk_p294.wav', #
139
+ 'en_US_vctk_p330.wav', #y
140
+ 'gu_IN_cmu-indic_cmu_indic_guj_ad.wav',#y
141
+ 'jv_ID_google-gmu_05219.wav',#y
142
+ 'en_US_vctk_p284.wav',#y
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  'en_US_m-ailabs_mary_ann.wav',
144
+ 'bn_multi_01701.wav',#y
145
+ 'en_US_vctk_p262.wav',#y
146
+ 'en_US_vctk_p243.wav', #y
147
+ 'en_US_vctk_p278.wav', #y
148
+ 'en_US_vctk_p250.wav', #y cl
149
+ 'nl_femal.wav', #y
150
+ 'en_US_vctk_p228.wav', #y
151
+ 'ne_NP_ne-google_0649.wav',#
152
+ 'en_US_cmu_arctic_gka.wav',#y
153
+ 'en_US_vctk_p361.wav', #y
154
+ 'jv_ID_google-gmu_02326.wav', #y
155
+ 'tn_ZA_google-nwu_1932.wav', #y
156
+ 'de_DE_thorsten-emotion_amused.wav', #y
157
+ 'jv_ID_google-gmu_08002.wav', #y
158
+ 'tn_ZA_google-nwu_3629.wav',#y
159
+ 'en_US_vctk_p230.wav', #y
160
+ 'af_ZA_google-nwu_7214.wav', #y
161
+ 'nl_nathalie.wav', #
162
+ 'en_US_cmu_arctic_lnh.wav',#y
163
+ 'tn_ZA_google-nwu_6459.wav', #y
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  'tn_ZA_google-nwu_6206.wav',
165
+ 'en_US_vctk_p323.wav', #y clips
166
+ 'en_US_m-ailabs_judy_bieber.wav',#y
167
+ 'en_US_vctk_p261.wav', #y
168
+ 'fa_haaniye.wav', #y
169
+ # 'en_US_vctk_p339.wav',
170
+ 'tn_ZA_google-nwu_7896.wav',#y
171
+ 'en_US_vctk_p258.wav', #y clps
172
+ 'tn_ZA_google-nwu_7674.wav', #y
173
+ 'en_US_hifi-tts_6097.wav', #y
174
+ 'en_US_vctk_p304.wav', #y clps
175
+ 'en_US_vctk_p307.wav', #y
176
+ 'fr_FR_m-ailabs_bernard.wav', #y
177
+ 'en_US_cmu_arctic_jmk.wav', #y
178
+ 'ne_NP_ne-google_0283.wav', #
179
+ 'en_US_vctk_p246.wav', #y
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  'en_US_vctk_p276.wav', # y
181
+ 'style_o22050.wav', #y
182
+ 'en_US_vctk_s5.wav', #y
183
+ 'en_US_vctk_p268.wav', #y reduce clip
184
+ 'af_ZA_google-nwu_8924.wav', #y
185
+ 'en_US_vctk_p363.wav', #y
186
+ # 'it_IT_mls_644.wav',
187
+ 'ne_NP_ne-google_3614.wav', #
188
+ 'ne_NP_ne-google_3154.wav', #
189
+ 'en_US_cmu_arctic_eey.wav', # y fix styl
190
+ 'tn_ZA_google-nwu_2839.wav', # y
191
+ 'af_ZA_google-nwu_7130.wav', #
192
+ 'ne_NP_ne-google_2139.wav', #y
193
+ 'jv_ID_google-gmu_04715.wav', #
194
+ 'en_US_vctk_p273.wav' #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  ]
196
  VOICES = [t[:-4] for t in VOICES] # crop .wav for visuals in gr.DropDown
197
 
tts.py CHANGED
@@ -167,7 +167,7 @@ class StyleTTS2(nn.Module):
167
 
168
  _translator = str.maketrans('', '', string.punctuation)
169
 
170
- text = [sub_sent.translate(_translator) + '.' for sub_sent in textwrap.wrap(text, 294)]
171
 
172
  # # text = nltk.sent_tokenize(text)
173
  # # text = [i for sent in sentences for i in textwrap.wrap(sent, width=120)]
 
167
 
168
  _translator = str.maketrans('', '', string.punctuation)
169
 
170
+ text = [sub_sent.translate(_translator) + '.' for sub_sent in textwrap.wrap(text, 74)] # 294 sounds spongy
171
 
172
  # # text = nltk.sent_tokenize(text)
173
  # # text = [i for sent in sentences for i in textwrap.wrap(sent, width=120)]