cyrills
Browse files- app.py +3 -4
- textual.py +41 -62
app.py
CHANGED
|
@@ -91,12 +91,11 @@ def audionar_tts(text=None,
|
|
| 91 |
|
| 92 |
|
| 93 |
|
| 94 |
-
|
| 95 |
-
x = np.concatenate([0.49 * x, 0.51 * x], 0)
|
| 96 |
|
| 97 |
|
| 98 |
wavfile = '_vits_.wav'
|
| 99 |
-
audiofile.write(wavfile, x, fs)
|
| 100 |
return wavfile # 2x file for [audio out & state to pass to the Emotion reco tAB]
|
| 101 |
|
| 102 |
|
|
@@ -215,4 +214,4 @@ with gr.Blocks(theme='huggingface') as demo:
|
|
| 215 |
fn=audionar_tts,
|
| 216 |
inputs=[text_input, choice_dropdown],
|
| 217 |
outputs=[output_audio])
|
| 218 |
-
demo.launch(debug=True)
|
|
|
|
| 91 |
|
| 92 |
|
| 93 |
|
| 94 |
+
|
|
|
|
| 95 |
|
| 96 |
|
| 97 |
wavfile = '_vits_.wav'
|
| 98 |
+
audiofile.write(wavfile, x, fs)
|
| 99 |
return wavfile # 2x file for [audio out & state to pass to the Emotion reco tAB]
|
| 100 |
|
| 101 |
|
|
|
|
| 214 |
fn=audionar_tts,
|
| 215 |
inputs=[text_input, choice_dropdown],
|
| 216 |
outputs=[output_audio])
|
| 217 |
+
demo.launch(debug=True)
|
textual.py
CHANGED
|
@@ -19,8 +19,8 @@ def only_greek_or_only_latin(text, lang='grc'):
|
|
| 19 |
latin_to_greek_map = {
|
| 20 |
'a': 'α', 'b': 'β', 'g': 'γ', 'd': 'δ', 'e': 'ε',
|
| 21 |
'ch': 'τσο', # Example of a multi-character Latin sequence
|
| 22 |
-
'z': 'ζ', 'h': 'χ', 'i': 'ι', 'k': 'κ', 'l': 'λ',
|
| 23 |
-
'm': 'μ', 'n': 'ν', 'x': 'ξ', 'o': 'ο', 'p': 'π',
|
| 24 |
'v': 'β', 'sc': 'σκ', 'r': 'ρ', 's': 'σ', 't': 'τ',
|
| 25 |
'u': 'ου', 'f': 'φ', 'c': 'σ', 'w': 'β', 'y': 'γ',
|
| 26 |
}
|
|
@@ -36,25 +36,45 @@ def only_greek_or_only_latin(text, lang='grc'):
|
|
| 36 |
}
|
| 37 |
|
| 38 |
cyrillic_to_latin_map = {
|
| 39 |
-
'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ё': 'yo', 'ж': 'zh',
|
| 40 |
-
'з': 'z', 'и': 'i', 'й': 'y', 'к': 'k', 'л': 'l', 'м': 'm', 'н': 'n', 'о': 'o',
|
| 41 |
-
'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u', 'ф': 'f', 'х': 'kh', 'ц': 'ts',
|
| 42 |
-
'ч': 'ch', 'ш': 'sh', 'щ': 'shch', 'ъ': '', 'ы': 'y', 'ь': '', 'э': 'e', 'ю': 'yu',
|
| 43 |
-
'я': 'ya',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
}
|
| 45 |
|
| 46 |
-
#
|
| 47 |
-
# These are approximations and may not be universally accepted transliterations.
|
| 48 |
cyrillic_to_greek_map = {
|
| 49 |
-
'а': 'α', 'б': 'β', 'в': 'β', 'г': 'γ', 'д': 'δ', 'е': 'ε', 'ё': 'ιο', 'ж': 'ζ',
|
| 50 |
-
'з': 'ζ', 'и': 'ι', 'й': 'ι', 'κ': 'κ', 'λ': 'λ', 'м': 'μ', 'н': 'ν', 'о': 'ο',
|
| 51 |
-
'π': 'π', 'ρ': 'ρ', 'σ': 'σ', 'τ': 'τ', 'у': 'ου', 'ф': 'φ', 'х': 'χ', 'ц': 'τσ',
|
| 52 |
-
'ч': 'τσ', # or τζ depending on desired sound
|
| 53 |
-
'ш': 'σ', 'щ': 'σ', # approximations
|
| 54 |
-
'ъ': '', 'ы': 'ι', 'ь': '', 'э': 'ε', 'ю': 'ιου',
|
| 55 |
-
'я': 'ια',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
}
|
| 57 |
|
|
|
|
| 58 |
# Convert the input text to lowercase, preserving accents for Latin characters.
|
| 59 |
# casefold() is used for more robust caseless matching across Unicode characters.
|
| 60 |
lowercased_text = text.lower() #casefold()
|
|
@@ -150,19 +170,13 @@ def fix_vocals(text, lang='ron'):
|
|
| 150 |
'^': ' la puterea ',
|
| 151 |
'+': ' plus ',
|
| 152 |
' - ': ' minus ', # only replace if standalone so to not say minus if is a-b-c
|
| 153 |
-
'*': ' ori ', # times
|
| 154 |
'/': ' împărțit la ', # divided by
|
| 155 |
'=': ' egal cu ', # equals
|
| 156 |
'pi': ' pi ',
|
| 157 |
'<': ' mai mic decât ',
|
| 158 |
'>': ' mai mare decât',
|
| 159 |
'%': ' la sută ', # percent (from previous)
|
| 160 |
-
'(': ' paranteză deschisă ',
|
| 161 |
-
')': ' paranteză închisă ',
|
| 162 |
-
'[': ' paranteză pătrată deschisă ',
|
| 163 |
-
']': ' paranteză pătrată închisă ',
|
| 164 |
-
'{': ' acoladă deschisă ',
|
| 165 |
-
'}': ' acoladă închisă ',
|
| 166 |
'≠': ' nu este egal cu ',
|
| 167 |
'≤': ' mai mic sau egal cu ',
|
| 168 |
'≥': ' mai mare sau egal cu ',
|
|
@@ -189,7 +203,7 @@ def fix_vocals(text, lang='ron'):
|
|
| 189 |
'^': ' to the power of ',
|
| 190 |
'+': ' plus ',
|
| 191 |
' - ': ' minus ',
|
| 192 |
-
'*': ' times ',
|
| 193 |
' / ': ' divided by ',
|
| 194 |
'=': ' equals ',
|
| 195 |
'pi': ' pi ',
|
|
@@ -197,12 +211,6 @@ def fix_vocals(text, lang='ron'):
|
|
| 197 |
'>': ' greater than ',
|
| 198 |
# Additional common math symbols from previous list
|
| 199 |
'%': ' percent ',
|
| 200 |
-
'(': ' open parenthesis ',
|
| 201 |
-
')': ' close parenthesis ',
|
| 202 |
-
'[': ' open bracket ',
|
| 203 |
-
']': ' close bracket ',
|
| 204 |
-
'{': ' open curly brace ',
|
| 205 |
-
'}': ' close curly brace ',
|
| 206 |
'∑': ' sum ',
|
| 207 |
'∫': ' integral ',
|
| 208 |
'√': ' square root of ',
|
|
@@ -223,10 +231,12 @@ def fix_vocals(text, lang='ron'):
|
|
| 223 |
'rn': 'rrn',
|
| 224 |
'ć': 'č',
|
| 225 |
'c': 'č',
|
| 226 |
-
'
|
|
|
|
| 227 |
'j': 'i',
|
| 228 |
'l': 'lll',
|
| 229 |
'w': 'v',
|
|
|
|
| 230 |
# https://huggingface.co/facebook/mms-tts-rmc-script_latin
|
| 231 |
'sqrt': 'kvadratni koren iz',
|
| 232 |
'^': ' na stepen ',
|
|
@@ -239,12 +249,6 @@ def fix_vocals(text, lang='ron'):
|
|
| 239 |
'<': ' manje od ',
|
| 240 |
'>': ' veće od ',
|
| 241 |
'%': ' procenat ',
|
| 242 |
-
'(': ' otvorena zagrada ',
|
| 243 |
-
')': ' zatvorena zagrada ',
|
| 244 |
-
'[': ' otvorena uglasta zagrada ',
|
| 245 |
-
']': ' zatvorena uglasta zagrada ',
|
| 246 |
-
'{': ' otvorena vitičasta zagrada ',
|
| 247 |
-
'}': ' zatvorena vitičasta zagrada ',
|
| 248 |
'∑': ' suma ',
|
| 249 |
'∫': ' integral ',
|
| 250 |
'√': ' kvadratni koren ',
|
|
@@ -283,7 +287,6 @@ def fix_vocals(text, lang='ron'):
|
|
| 283 |
# "đ": "ď",
|
| 284 |
# "lj": "ľ",
|
| 285 |
# "nj": "ň",
|
| 286 |
-
# "ž": "z",
|
| 287 |
# "c": "č"
|
| 288 |
}
|
| 289 |
|
|
@@ -309,12 +312,6 @@ def fix_vocals(text, lang='ron'):
|
|
| 309 |
'>': ' größer als',
|
| 310 |
# Additional common math symbols from previous list
|
| 311 |
'%': ' prozent ',
|
| 312 |
-
'(': ' Klammer auf ',
|
| 313 |
-
')': ' Klammer zu ',
|
| 314 |
-
'[': ' eckige Klammer auf ',
|
| 315 |
-
']': ' eckige Klammer zu ',
|
| 316 |
-
'{': ' geschweifte Klammer auf ',
|
| 317 |
-
'}': ' geschweifte Klammer zu ',
|
| 318 |
'∑': ' Summe ',
|
| 319 |
'∫': ' Integral ',
|
| 320 |
'√': ' Quadratwurzel ',
|
|
@@ -348,12 +345,6 @@ def fix_vocals(text, lang='ron'):
|
|
| 348 |
'>': ' supérieur à ',
|
| 349 |
# Add more common math symbols as needed for French
|
| 350 |
'%': ' pour cent ',
|
| 351 |
-
'(': ' parenthèse ouverte ',
|
| 352 |
-
')': ' parenthèse fermée ',
|
| 353 |
-
'[': ' crochet ouvert ',
|
| 354 |
-
']': ' crochet fermé ',
|
| 355 |
-
'{': ' accolade ouverte ',
|
| 356 |
-
'}': ' accolade fermée ',
|
| 357 |
'∑': ' somme ',
|
| 358 |
'∫': ' intégrale ',
|
| 359 |
'√': ' racine carrée ',
|
|
@@ -391,12 +382,6 @@ def fix_vocals(text, lang='ron'):
|
|
| 391 |
'>': ' nagyobb mint ',
|
| 392 |
# Add more common math symbols as needed for Hungarian
|
| 393 |
'%': ' százalék ',
|
| 394 |
-
'(': ' nyitó zárójel ',
|
| 395 |
-
')': ' záró zárójel ',
|
| 396 |
-
'[': ' nyitó szögletes zárójel ',
|
| 397 |
-
']': ' záró szögletes zárójel ',
|
| 398 |
-
'{': ' nyitó kapcsos zárójel ',
|
| 399 |
-
'}': ' záró kapcsos zárójel ',
|
| 400 |
'∑': ' szumma ',
|
| 401 |
'∫': ' integrál ',
|
| 402 |
'√': ' négyzetgyök ',
|
|
@@ -429,12 +414,6 @@ def fix_vocals(text, lang='ron'):
|
|
| 429 |
'>': ' μεῖζον ',
|
| 430 |
# Add more common math symbols as needed for Ancient Greek
|
| 431 |
'%': ' τοῖς ἑκατόν ', # tois hekaton - 'of the hundred'
|
| 432 |
-
'(': ' ἀνοικτὴ παρένθεσις ',
|
| 433 |
-
')': ' κλειστὴ παρένθεσις ',
|
| 434 |
-
'[': ' ἀνοικτὴ ἀγκύλη ',
|
| 435 |
-
']': ' κλειστὴ ἀγκύλη ',
|
| 436 |
-
'{': ' ἀνοικτὴ σγουρὴ ἀγκύλη ',
|
| 437 |
-
'}': ' κλειστὴ σγουρὴ ἀγκύλη ',
|
| 438 |
'∑': ' ἄθροισμα ',
|
| 439 |
'∫': ' ὁλοκλήρωμα ',
|
| 440 |
'√': ' τετραγωνικὴ ῥίζα ',
|
|
|
|
| 19 |
latin_to_greek_map = {
|
| 20 |
'a': 'α', 'b': 'β', 'g': 'γ', 'd': 'δ', 'e': 'ε',
|
| 21 |
'ch': 'τσο', # Example of a multi-character Latin sequence
|
| 22 |
+
'z': 'ζ', 'h': 'χ', 'i': 'ι', 'j': 'ζ', 'k': 'κ', 'l': 'λ',
|
| 23 |
+
'm': 'μ', 'n': 'ν', 'x': 'ξ', 'o': 'ο', 'p': 'π', 'q': 'κ',
|
| 24 |
'v': 'β', 'sc': 'σκ', 'r': 'ρ', 's': 'σ', 't': 'τ',
|
| 25 |
'u': 'ου', 'f': 'φ', 'c': 'σ', 'w': 'β', 'y': 'γ',
|
| 26 |
}
|
|
|
|
| 36 |
}
|
| 37 |
|
| 38 |
cyrillic_to_latin_map = {
|
| 39 |
+
# 'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ё': 'yo', 'ж': 'zh',
|
| 40 |
+
# 'з': 'z', 'и': 'i', 'й': 'y', 'к': 'k', 'л': 'l', 'м': 'm', 'н': 'n', 'о': 'o',
|
| 41 |
+
# 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u', 'ф': 'f', 'х': 'kh', 'ц': 'ts',
|
| 42 |
+
# 'ч': 'ch', 'ш': 'sh', 'щ': 'shch', 'ъ': '', 'ы': 'y', 'ь': '', 'э': 'e', 'ю': 'yu',
|
| 43 |
+
# 'я': 'ya',
|
| 44 |
+
# ----------------кључеви
|
| 45 |
+
'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ж': 'z',
|
| 46 |
+
'з': 'z', 'и': 'i', 'ј': 'j', 'к': 'k', 'л': 'l', 'м': 'm', 'н': 'n',
|
| 47 |
+
'о': 'o', 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u', 'ф': 'f',
|
| 48 |
+
'х': 'h', 'ц': 'c', 'ч': 'c', 'ш': 's', "ž": "z",
|
| 49 |
+
'ђ': 'dzi', 'љ': 'li', 'њ': 'ni', 'ћ': 'c', 'џ': 'dz',
|
| 50 |
+
'ё': 'e', 'й': 'i', 'щ': 's', 'ъ': '', 'ы': 'y', 'ь': '',
|
| 51 |
+
'э': 'e', 'ю': 'io', 'я': 'a',
|
| 52 |
+
'ѓ': 'y', 'ѕ': 's', 'ќ': 'k',
|
| 53 |
}
|
| 54 |
|
| 55 |
+
# Cyrillic to Greek on phonetic similarity.
|
|
|
|
| 56 |
cyrillic_to_greek_map = {
|
| 57 |
+
# 'а': 'α', 'б': 'β', 'в': 'β', 'г': 'γ', 'д': 'δ', 'е': 'ε', 'ё': 'ιο', 'ж': 'ζ',
|
| 58 |
+
# 'з': 'ζ', 'и': 'ι', 'й': 'ι', 'κ': 'κ', 'λ': 'λ', 'м': 'μ', 'н': 'ν', 'о': 'ο',
|
| 59 |
+
# 'π': 'π', 'ρ': 'ρ', 'σ': 'σ', 'τ': 'τ', 'у': 'ου', 'ф': 'φ', 'х': 'χ', 'ц': 'τσ',
|
| 60 |
+
# 'ч': 'τσ', # or τζ depending on desired sound
|
| 61 |
+
# 'ш': 'σ', 'щ': 'σ', # approximations
|
| 62 |
+
# 'ъ': '', 'ы': 'ι', 'ь': '', 'э': 'ε', 'ю': 'ιου',
|
| 63 |
+
# 'я': 'ια',
|
| 64 |
+
# --------------------
|
| 65 |
+
'а': 'α', 'б': 'μπ', 'в': 'β', 'г': 'γ', 'д': 'δ', 'е': 'ε',
|
| 66 |
+
'ж': 'ζ', 'з': 'ζ', 'и': 'ι', 'й': 'ι', 'к': 'κ',
|
| 67 |
+
'л': 'λ', 'м': 'μ', 'н': 'ν', 'о': 'ο', 'п': 'π', 'р': 'ρ',
|
| 68 |
+
'с': 'τσ', 'т': 'τ', 'у': 'ού', 'ф': 'φ', 'х': 'χ', 'ц': 'τσ',
|
| 69 |
+
'ч': 'τσ', 'ш': 'σ', 'щ': 'σ',
|
| 70 |
+
#
|
| 71 |
+
'ђ': 'ντζι', 'љ': 'λι', 'њ': 'νι', 'ћ': 'τσ', 'џ': 'ντζ',
|
| 72 |
+
'ы': 'ι', 'ь': '',
|
| 73 |
+
'э': 'ε', 'ю': 'ιο', 'я': 'ια',
|
| 74 |
+
'ѓ': 'γ', 'ѕ': 'σ',
|
| 75 |
}
|
| 76 |
|
| 77 |
+
|
| 78 |
# Convert the input text to lowercase, preserving accents for Latin characters.
|
| 79 |
# casefold() is used for more robust caseless matching across Unicode characters.
|
| 80 |
lowercased_text = text.lower() #casefold()
|
|
|
|
| 170 |
'^': ' la puterea ',
|
| 171 |
'+': ' plus ',
|
| 172 |
' - ': ' minus ', # only replace if standalone so to not say minus if is a-b-c
|
| 173 |
+
# '*': ' ori ', # times
|
| 174 |
'/': ' împărțit la ', # divided by
|
| 175 |
'=': ' egal cu ', # equals
|
| 176 |
'pi': ' pi ',
|
| 177 |
'<': ' mai mic decât ',
|
| 178 |
'>': ' mai mare decât',
|
| 179 |
'%': ' la sută ', # percent (from previous)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
'≠': ' nu este egal cu ',
|
| 181 |
'≤': ' mai mic sau egal cu ',
|
| 182 |
'≥': ' mai mare sau egal cu ',
|
|
|
|
| 203 |
'^': ' to the power of ',
|
| 204 |
'+': ' plus ',
|
| 205 |
' - ': ' minus ',
|
| 206 |
+
# '*': ' times ',
|
| 207 |
' / ': ' divided by ',
|
| 208 |
'=': ' equals ',
|
| 209 |
'pi': ' pi ',
|
|
|
|
| 211 |
'>': ' greater than ',
|
| 212 |
# Additional common math symbols from previous list
|
| 213 |
'%': ' percent ',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
'∑': ' sum ',
|
| 215 |
'∫': ' integral ',
|
| 216 |
'√': ' square root of ',
|
|
|
|
| 231 |
'rn': 'rrn',
|
| 232 |
'ć': 'č',
|
| 233 |
'c': 'č',
|
| 234 |
+
'č': 'ts',
|
| 235 |
+
'đ': 'dz',
|
| 236 |
'j': 'i',
|
| 237 |
'l': 'lll',
|
| 238 |
'w': 'v',
|
| 239 |
+
'h': 'hh',
|
| 240 |
# https://huggingface.co/facebook/mms-tts-rmc-script_latin
|
| 241 |
'sqrt': 'kvadratni koren iz',
|
| 242 |
'^': ' na stepen ',
|
|
|
|
| 249 |
'<': ' manje od ',
|
| 250 |
'>': ' veće od ',
|
| 251 |
'%': ' procenat ',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
'∑': ' suma ',
|
| 253 |
'∫': ' integral ',
|
| 254 |
'√': ' kvadratni koren ',
|
|
|
|
| 287 |
# "đ": "ď",
|
| 288 |
# "lj": "ľ",
|
| 289 |
# "nj": "ň",
|
|
|
|
| 290 |
# "c": "č"
|
| 291 |
}
|
| 292 |
|
|
|
|
| 312 |
'>': ' größer als',
|
| 313 |
# Additional common math symbols from previous list
|
| 314 |
'%': ' prozent ',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
'∑': ' Summe ',
|
| 316 |
'∫': ' Integral ',
|
| 317 |
'√': ' Quadratwurzel ',
|
|
|
|
| 345 |
'>': ' supérieur à ',
|
| 346 |
# Add more common math symbols as needed for French
|
| 347 |
'%': ' pour cent ',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
'∑': ' somme ',
|
| 349 |
'∫': ' intégrale ',
|
| 350 |
'√': ' racine carrée ',
|
|
|
|
| 382 |
'>': ' nagyobb mint ',
|
| 383 |
# Add more common math symbols as needed for Hungarian
|
| 384 |
'%': ' százalék ',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
'∑': ' szumma ',
|
| 386 |
'∫': ' integrál ',
|
| 387 |
'√': ' négyzetgyök ',
|
|
|
|
| 414 |
'>': ' μεῖζον ',
|
| 415 |
# Add more common math symbols as needed for Ancient Greek
|
| 416 |
'%': ' τοῖς ἑκατόν ', # tois hekaton - 'of the hundred'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
'∑': ' ἄθροισμα ',
|
| 418 |
'∫': ' ὁλοκλήρωμα ',
|
| 419 |
'√': ' τετραγωνικὴ ῥίζα ',
|