Spaces:

Dionyssos
/

SHIFT

Sleeping

App Files Files Community

Dionyssos commited on Sep 24

Commit

6ea8295

1 Parent(s): 37faf9c

cyrills

Browse files

Files changed (2) hide show

app.py +3 -4
textual.py +41 -62

app.py CHANGED Viewed

@@ -91,12 +91,11 @@ def audionar_tts(text=None,
-    x = x[None, :]
-    x = np.concatenate([0.49 * x, 0.51 * x], 0)
     wavfile = '_vits_.wav'
-    audiofile.write(wavfile, x, fs)
     return wavfile  # 2x file for [audio out & state to pass to the Emotion reco tAB]
@@ -215,4 +214,4 @@ with gr.Blocks(theme='huggingface') as demo:
         fn=audionar_tts,
         inputs=[text_input, choice_dropdown],
         outputs=[output_audio])
-demo.launch(debug=True)

     wavfile = '_vits_.wav'
+    audiofile.write(wavfile, x, fs)
     return wavfile  # 2x file for [audio out & state to pass to the Emotion reco tAB]
         fn=audionar_tts,
         inputs=[text_input, choice_dropdown],
         outputs=[output_audio])
+demo.launch(debug=True)

textual.py CHANGED Viewed

@@ -19,8 +19,8 @@ def only_greek_or_only_latin(text, lang='grc'):
     latin_to_greek_map = {
         'a': 'α', 'b': 'β', 'g': 'γ', 'd': 'δ', 'e': 'ε',
         'ch': 'τσο', # Example of a multi-character Latin sequence
-        'z': 'ζ', 'h': 'χ', 'i': 'ι', 'k': 'κ', 'l': 'λ',
-        'm': 'μ', 'n': 'ν', 'x': 'ξ', 'o': 'ο', 'p': 'π',
         'v': 'β', 'sc': 'σκ', 'r': 'ρ', 's': 'σ', 't': 'τ',
         'u': 'ου', 'f': 'φ', 'c': 'σ', 'w': 'β', 'y': 'γ',
     }
@@ -36,25 +36,45 @@ def only_greek_or_only_latin(text, lang='grc'):
     }
     cyrillic_to_latin_map = {
-        'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ё': 'yo', 'ж': 'zh',
-        'з': 'z', 'и': 'i', 'й': 'y', 'к': 'k', 'л': 'l', 'м': 'm', 'н': 'n', 'о': 'o',
-        'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u', 'ф': 'f', 'х': 'kh', 'ц': 'ts',
-        'ч': 'ch', 'ш': 'sh', 'щ': 'shch', 'ъ': '', 'ы': 'y', 'ь': '', 'э': 'e', 'ю': 'yu',
-        'я': 'ya',
     }
-    # Direct Cyrillic to Greek mapping based on phonetic similarity.
-    # These are approximations and may not be universally accepted transliterations.
     cyrillic_to_greek_map = {
-        'а': 'α', 'б': 'β', 'в': 'β', 'г': 'γ', 'д': 'δ', 'е': 'ε', 'ё': 'ιο', 'ж': 'ζ',
-        'з': 'ζ', 'и': 'ι', 'й': 'ι', 'κ': 'κ', 'λ': 'λ', 'м': 'μ', 'н': 'ν', 'о': 'ο',
-        'π': 'π', 'ρ': 'ρ', 'σ': 'σ', 'τ': 'τ', 'у': 'ου', 'ф': 'φ', 'х': 'χ', 'ц': 'τσ',
-        'ч': 'τσ', # or τζ depending on desired sound
-        'ш': 'σ', 'щ': 'σ', # approximations
-        'ъ': '', 'ы': 'ι', 'ь': '', 'э': 'ε', 'ю': 'ιου',
-        'я': 'ια',
     }
     # Convert the input text to lowercase, preserving accents for Latin characters.
     # casefold() is used for more robust caseless matching across Unicode characters.
     lowercased_text = text.lower()  #casefold()
@@ -150,19 +170,13 @@ def fix_vocals(text, lang='ron'):
         '^': ' la puterea ',
         '+': ' plus ',
         ' - ': ' minus ',  # only replace if standalone so to not say minus if is a-b-c
-        '*': ' ori ',  # times
         '/': ' împărțit la ',  # divided by
         '=': ' egal cu ',  # equals
         'pi': ' pi ',
         '<': ' mai mic decât ',
         '>': ' mai mare decât',
         '%': ' la sută ', # percent (from previous)
-        '(': ' paranteză deschisă ',
-        ')': ' paranteză închisă ',
-        '[': ' paranteză pătrată deschisă ',
-        ']': ' paranteză pătrată închisă ',
-        '{': ' acoladă deschisă ',
-        '}': ' acoladă închisă ',
         '≠': ' nu este egal cu ',
         '≤': ' mai mic sau egal cu ',
         '≥': ' mai mare sau egal cu ',
@@ -189,7 +203,7 @@ def fix_vocals(text, lang='ron'):
         '^': ' to the power of ',
         '+': ' plus ',
         ' - ': ' minus ',
-        '*': ' times ',
         ' / ': ' divided by ',
         '=': ' equals ',
         'pi': ' pi ',
@@ -197,12 +211,6 @@ def fix_vocals(text, lang='ron'):
         '>': ' greater than ',
         # Additional common math symbols from previous list
         '%': ' percent ',
-        '(': ' open parenthesis ',
-        ')': ' close parenthesis ',
-        '[': ' open bracket ',
-        ']': ' close bracket ',
-        '{': ' open curly brace ',
-        '}': ' close curly brace ',
         '∑': ' sum ',
         '∫': ' integral ',
         '√': ' square root of ',
@@ -223,10 +231,12 @@ def fix_vocals(text, lang='ron'):
         'rn': 'rrn',
         'ć': 'č',
         'c': 'č',
-        'đ': 'd',
         'j': 'i',
         'l': 'lll',
         'w': 'v',
         #  https://huggingface.co/facebook/mms-tts-rmc-script_latin
         'sqrt': 'kvadratni koren iz',
         '^': ' na stepen ',
@@ -239,12 +249,6 @@ def fix_vocals(text, lang='ron'):
         '<': ' manje od ',
         '>': ' veće od ',
         '%': ' procenat ',
-        '(': ' otvorena zagrada ',
-        ')': ' zatvorena zagrada ',
-        '[': ' otvorena uglasta zagrada ',
-        ']': ' zatvorena uglasta zagrada ',
-        '{': ' otvorena vitičasta zagrada ',
-        '}': ' zatvorena vitičasta zagrada ',
         '∑': ' suma ',
         '∫': ' integral ',
         '√': ' kvadratni koren ',
@@ -283,7 +287,6 @@ def fix_vocals(text, lang='ron'):
         # "đ": "ď",
         # "lj": "ľ",
         # "nj": "ň",
-        # "ž": "z",
         # "c": "č"
     }
@@ -309,12 +312,6 @@ def fix_vocals(text, lang='ron'):
         '>': ' größer als',
         # Additional common math symbols from previous list
         '%': ' prozent ',
-        '(': ' Klammer auf ',
-        ')': ' Klammer zu ',
-        '[': ' eckige Klammer auf ',
-        ']': ' eckige Klammer zu ',
-        '{': ' geschweifte Klammer auf ',
-        '}': ' geschweifte Klammer zu ',
         '∑': ' Summe ',
         '∫': ' Integral ',
         '√': ' Quadratwurzel ',
@@ -348,12 +345,6 @@ def fix_vocals(text, lang='ron'):
         '>': ' supérieur à ',
         # Add more common math symbols as needed for French
         '%': ' pour cent ',
-        '(': ' parenthèse ouverte ',
-        ')': ' parenthèse fermée ',
-        '[': ' crochet ouvert ',
-        ']': ' crochet fermé ',
-        '{': ' accolade ouverte ',
-        '}': ' accolade fermée ',
         '∑': ' somme ',
         '∫': ' intégrale ',
         '√': ' racine carrée ',
@@ -391,12 +382,6 @@ def fix_vocals(text, lang='ron'):
         '>': ' nagyobb mint ',
         # Add more common math symbols as needed for Hungarian
         '%': ' százalék ',
-        '(': ' nyitó zárójel ',
-        ')': ' záró zárójel ',
-        '[': ' nyitó szögletes zárójel ',
-        ']': ' záró szögletes zárójel ',
-        '{': ' nyitó kapcsos zárójel ',
-        '}': ' záró kapcsos zárójel ',
         '∑': ' szumma ',
         '∫': ' integrál ',
         '√': ' négyzetgyök ',
@@ -429,12 +414,6 @@ def fix_vocals(text, lang='ron'):
         '>': ' μεῖζον ',
         # Add more common math symbols as needed for Ancient Greek
         '%': ' τοῖς ἑκατόν ', # tois hekaton - 'of the hundred'
-        '(': ' ἀνοικτὴ παρένθεσις ',
-        ')': ' κλειστὴ παρένθεσις ',
-        '[': ' ἀνοικτὴ ἀγκύλη ',
-        ']': ' κλειστὴ ἀγκύλη ',
-        '{': ' ἀνοικτὴ σγουρὴ ἀγκύλη ',
-        '}': ' κλειστὴ σγουρὴ ἀγκύλη ',
         '∑': ' ἄθροισμα ',
         '∫': ' ὁλοκλήρωμα ',
         '√': ' τετραγωνικὴ ῥίζα ',

     latin_to_greek_map = {
         'a': 'α', 'b': 'β', 'g': 'γ', 'd': 'δ', 'e': 'ε',
         'ch': 'τσο', # Example of a multi-character Latin sequence
+        'z': 'ζ', 'h': 'χ', 'i': 'ι', 'j': 'ζ', 'k': 'κ', 'l': 'λ',
+        'm': 'μ', 'n': 'ν', 'x': 'ξ', 'o': 'ο', 'p': 'π', 'q': 'κ',
         'v': 'β', 'sc': 'σκ', 'r': 'ρ', 's': 'σ', 't': 'τ',
         'u': 'ου', 'f': 'φ', 'c': 'σ', 'w': 'β', 'y': 'γ',
     }
     }
     cyrillic_to_latin_map = {
+        # 'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ё': 'yo', 'ж': 'zh',
+        # 'з': 'z', 'и': 'i', 'й': 'y', 'к': 'k', 'л': 'l', 'м': 'm', 'н': 'n', 'о': 'o',
+        # 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u', 'ф': 'f', 'х': 'kh', 'ц': 'ts',
+        # 'ч': 'ch', 'ш': 'sh', 'щ': 'shch', 'ъ': '', 'ы': 'y', 'ь': '', 'э': 'e', 'ю': 'yu',
+        # 'я': 'ya',
+        # ----------------кључеви
+        'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ж': 'z',
+        'з': 'z', 'и': 'i', 'ј': 'j', 'к': 'k', 'л': 'l', 'м': 'm', 'н': 'n',
+        'о': 'o', 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u', 'ф': 'f',
+        'х': 'h', 'ц': 'c', 'ч': 'c', 'ш': 's', "ž": "z",
+        'ђ': 'dzi', 'љ': 'li', 'њ': 'ni', 'ћ': 'c', 'џ': 'dz',
+        'ё': 'e', 'й': 'i', 'щ': 's', 'ъ': '', 'ы': 'y', 'ь': '',
+        'э': 'e', 'ю': 'io', 'я': 'a',
+        'ѓ': 'y', 'ѕ': 's', 'ќ': 'k',
     }
+    # Cyrillic to Greek on phonetic similarity.
     cyrillic_to_greek_map = {
+        # 'а': 'α', 'б': 'β', 'в': 'β', 'г': 'γ', 'д': 'δ', 'е': 'ε', 'ё': 'ιο', 'ж': 'ζ',
+        # 'з': 'ζ', 'и': 'ι', 'й': 'ι', 'κ': 'κ', 'λ': 'λ', 'м': 'μ', 'н': 'ν', 'о': 'ο',
+        # 'π': 'π', 'ρ': 'ρ', 'σ': 'σ', 'τ': 'τ', 'у': 'ου', 'ф': 'φ', 'х': 'χ', 'ц': 'τσ',
+        # 'ч': 'τσ', # or τζ depending on desired sound
+        # 'ш': 'σ', 'щ': 'σ', # approximations
+        # 'ъ': '', 'ы': 'ι', 'ь': '', 'э': 'ε', 'ю': 'ιου',
+        # 'я': 'ια',
+        # --------------------
+        'а': 'α', 'б': 'μπ', 'в': 'β', 'г': 'γ', 'д': 'δ', 'е': 'ε',
+        'ж': 'ζ', 'з': 'ζ', 'и': 'ι', 'й': 'ι', 'к': 'κ',
+        'л': 'λ', 'м': 'μ', 'н': 'ν', 'о': 'ο', 'п': 'π', 'р': 'ρ',
+        'с': 'τσ', 'т': 'τ', 'у': 'ού', 'ф': 'φ', 'х': 'χ', 'ц': 'τσ',
+        'ч': 'τσ', 'ш': 'σ', 'щ': 'σ',
+        #
+        'ђ': 'ντζι', 'љ': 'λι', 'њ': 'νι', 'ћ': 'τσ', 'џ': 'ντζ',
+        'ы': 'ι', 'ь': '',
+        'э': 'ε', 'ю': 'ιο', 'я': 'ια',
+        'ѓ': 'γ', 'ѕ': 'σ',
     }
     # Convert the input text to lowercase, preserving accents for Latin characters.
     # casefold() is used for more robust caseless matching across Unicode characters.
     lowercased_text = text.lower()  #casefold()
         '^': ' la puterea ',
         '+': ' plus ',
         ' - ': ' minus ',  # only replace if standalone so to not say minus if is a-b-c
+        # '*': ' ori ',  # times
         '/': ' împărțit la ',  # divided by
         '=': ' egal cu ',  # equals
         'pi': ' pi ',
         '<': ' mai mic decât ',
         '>': ' mai mare decât',
         '%': ' la sută ', # percent (from previous)
         '≠': ' nu este egal cu ',
         '≤': ' mai mic sau egal cu ',
         '≥': ' mai mare sau egal cu ',
         '^': ' to the power of ',
         '+': ' plus ',
         ' - ': ' minus ',
+        # '*': ' times ',
         ' / ': ' divided by ',
         '=': ' equals ',
         'pi': ' pi ',
         '>': ' greater than ',
         # Additional common math symbols from previous list
         '%': ' percent ',
         '∑': ' sum ',
         '∫': ' integral ',
         '√': ' square root of ',
         'rn': 'rrn',
         'ć': 'č',
         'c': 'č',
+        'č': 'ts',
+        'đ': 'dz',
         'j': 'i',
         'l': 'lll',
         'w': 'v',
+        'h': 'hh',
         #  https://huggingface.co/facebook/mms-tts-rmc-script_latin
         'sqrt': 'kvadratni koren iz',
         '^': ' na stepen ',
         '<': ' manje od ',
         '>': ' veće od ',
         '%': ' procenat ',
         '∑': ' suma ',
         '∫': ' integral ',
         '√': ' kvadratni koren ',
         # "đ": "ď",
         # "lj": "ľ",
         # "nj": "ň",
         # "c": "č"
     }
         '>': ' größer als',
         # Additional common math symbols from previous list
         '%': ' prozent ',
         '∑': ' Summe ',
         '∫': ' Integral ',
         '√': ' Quadratwurzel ',
         '>': ' supérieur à ',
         # Add more common math symbols as needed for French
         '%': ' pour cent ',
         '∑': ' somme ',
         '∫': ' intégrale ',
         '√': ' racine carrée ',
         '>': ' nagyobb mint ',
         # Add more common math symbols as needed for Hungarian
         '%': ' százalék ',
         '∑': ' szumma ',
         '∫': ' integrál ',
         '√': ' négyzetgyök ',
         '>': ' μεῖζον ',
         # Add more common math symbols as needed for Ancient Greek
         '%': ' τοῖς ἑκατόν ', # tois hekaton - 'of the hundred'
         '∑': ' ἄθροισμα ',
         '∫': ' ὁλοκλήρωμα ',
         '√': ' τετραγωνικὴ ῥίζα ',