Dionyssos commited on
Commit
275056e
·
1 Parent(s): 4964a1b
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +16 -13
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Speech analysis
3
  emoji: 💤
4
  colorFrom: gray
5
  colorTo: gray
 
1
  ---
2
+ title: affective TTS
3
  emoji: 💤
4
  colorFrom: gray
5
  colorTo: gray
app.py CHANGED
@@ -48,20 +48,23 @@ def audionar_tts(text=None,
48
  }
49
 
50
  if text is None or text.strip() == '':
51
- text = 'No Audio or Txt Input'
52
 
53
 
54
 
 
55
 
56
  if lang not in language_names: # StyleTTS2
57
 
 
 
58
  text = only_greek_or_only_latin(text, lang='eng')
59
 
60
  x = _tts.inference(text,
61
  ref_s='wav/' + lang + '.wav')[0, 0, :].numpy() # 24 Khz
62
 
63
  else: # VITS
64
-
65
  lang_code = lang_map.get(lang.lower(), lang.lower().split()[0].strip())
66
 
67
  global cached_lang_code, cached_net_g, cached_tokenizer
@@ -100,7 +103,7 @@ def audionar_tts(text=None,
100
 
101
 
102
  wavfile = '_vits_.wav'
103
- audiofile.write(wavfile, x, 16000)
104
  return wavfile # 2x file for [audio out & state to pass to the Emotion reco tAB]
105
 
106
 
@@ -140,9 +143,9 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
140
  'en_US_vctk_p233.wav',
141
  'it_IT_mls_4975.wav',
142
  'en_US_vctk_p236.wav',
143
- 'bn_multi_01232.wav',
144
  'bn_multi_5958.wav',
145
- 'it_IT_mls_9185.wav',
146
  'en_US_vctk_p248.wav',
147
  'en_US_vctk_p287.wav',
148
  'it_IT_mls_9772.wav',
@@ -199,8 +202,8 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
199
  'en_US_vctk_p302.wav',
200
  'jv_ID_google-gmu_08178.wav',
201
  'en_US_vctk_p313.wav',
202
- 'af_ZA_google-nwu_2418.wav',
203
- 'bn_multi_00737.wav',
204
  'en_US_vctk_p275.wav', # y
205
  'af_ZA_google-nwu_0184.wav',
206
  'jv_ID_google-gmu_07638.wav',
@@ -292,7 +295,7 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
292
  'nl_nathalie.wav',
293
  'it_IT_mls_8207.wav',
294
  'ko_KO_kss.wav',
295
- 'af_ZA_google-nwu_6590.wav',
296
  'jv_ID_google-gmu_00264.wav',
297
  'tn_ZA_google-nwu_6234.wav',
298
  'jv_ID_google-gmu_05522.wav',
@@ -354,10 +357,10 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
354
  'it_IT_mls_7936.wav',
355
  'en_US_vctk_p301.wav',
356
  'tn_ZA_google-nwu_0045.wav',
357
- 'it_IT_mls_659.wav',
358
  'tn_ZA_google-nwu_7674.wav',
359
- 'it_IT_mls_12804.wav',
360
- 'el_GR_rapunzelina.wav',
361
  'en_US_hifi-tts_6097.wav',
362
  'en_US_vctk_p257.wav',
363
  'jv_ID_google-gmu_07875.wav',
@@ -381,7 +384,7 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
381
  'de_DE_m-ailabs_karlsson.wav',
382
  'en_US_cmu_arctic_awb.wav',
383
  'en_US_vctk_p246.wav',
384
- 'en_US_cmu_arctic_clb.wav',
385
  'en_US_vctk_p364.wav',
386
  'nl_flemishguy.wav',
387
  'en_US_vctk_p276.wav', # y
@@ -425,7 +428,7 @@ VOICES = ['jv_ID_google-gmu_04982.wav',
425
  # 'male-27-sad.wav',
426
  'tn_ZA_google-nwu_1498.wav',
427
  'fi_FI_harri-tapani-ylilammi.wav',
428
- 'bn_multi_rm.wav',
429
  'ne_NP_ne-google_2139.wav',
430
  'pl_PL_m-ailabs_piotr_nater.wav',
431
  'fr_FR_siwis.wav',
 
48
  }
49
 
50
  if text is None or text.strip() == '':
51
+ text = 'No Txt Has been typed'
52
 
53
 
54
 
55
+ fs = 16000
56
 
57
  if lang not in language_names: # StyleTTS2
58
 
59
+ fs = 24000
60
+
61
  text = only_greek_or_only_latin(text, lang='eng')
62
 
63
  x = _tts.inference(text,
64
  ref_s='wav/' + lang + '.wav')[0, 0, :].numpy() # 24 Khz
65
 
66
  else: # VITS
67
+
68
  lang_code = lang_map.get(lang.lower(), lang.lower().split()[0].strip())
69
 
70
  global cached_lang_code, cached_net_g, cached_tokenizer
 
103
 
104
 
105
  wavfile = '_vits_.wav'
106
+ audiofile.write(wavfile, x, fs)
107
  return wavfile # 2x file for [audio out & state to pass to the Emotion reco tAB]
108
 
109
 
 
143
  'en_US_vctk_p233.wav',
144
  'it_IT_mls_4975.wav',
145
  'en_US_vctk_p236.wav',
146
+ # 'bn_multi_01232.wav',
147
  'bn_multi_5958.wav',
148
+ # 'it_IT_mls_9185.wav',
149
  'en_US_vctk_p248.wav',
150
  'en_US_vctk_p287.wav',
151
  'it_IT_mls_9772.wav',
 
202
  'en_US_vctk_p302.wav',
203
  'jv_ID_google-gmu_08178.wav',
204
  'en_US_vctk_p313.wav',
205
+ # 'af_ZA_google-nwu_2418.wav',
206
+ # 'bn_multi_00737.wav',
207
  'en_US_vctk_p275.wav', # y
208
  'af_ZA_google-nwu_0184.wav',
209
  'jv_ID_google-gmu_07638.wav',
 
295
  'nl_nathalie.wav',
296
  'it_IT_mls_8207.wav',
297
  'ko_KO_kss.wav',
298
+ # 'af_ZA_google-nwu_6590.wav',
299
  'jv_ID_google-gmu_00264.wav',
300
  'tn_ZA_google-nwu_6234.wav',
301
  'jv_ID_google-gmu_05522.wav',
 
357
  'it_IT_mls_7936.wav',
358
  'en_US_vctk_p301.wav',
359
  'tn_ZA_google-nwu_0045.wav',
360
+ # 'it_IT_mls_659.wav',
361
  'tn_ZA_google-nwu_7674.wav',
362
+ # 'it_IT_mls_12804.wav',
363
+ # 'el_GR_rapunzelina.wav',
364
  'en_US_hifi-tts_6097.wav',
365
  'en_US_vctk_p257.wav',
366
  'jv_ID_google-gmu_07875.wav',
 
384
  'de_DE_m-ailabs_karlsson.wav',
385
  'en_US_cmu_arctic_awb.wav',
386
  'en_US_vctk_p246.wav',
387
+ # 'en_US_cmu_arctic_clb.wav',
388
  'en_US_vctk_p364.wav',
389
  'nl_flemishguy.wav',
390
  'en_US_vctk_p276.wav', # y
 
428
  # 'male-27-sad.wav',
429
  'tn_ZA_google-nwu_1498.wav',
430
  'fi_FI_harri-tapani-ylilammi.wav',
431
+ # 'bn_multi_rm.wav',
432
  'ne_NP_ne-google_2139.wav',
433
  'pl_PL_m-ailabs_piotr_nater.wav',
434
  'fr_FR_siwis.wav',