language-identification

Running

App Files Files Community

kargaranamir commited on Nov 1, 2023

Commit

c53190e

1 Parent(s): 2a21b24

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -4

app.py CHANGED Viewed

@@ -153,8 +153,25 @@ def load_GlotLID_v2(model_name, file_name):
     return model
 model_1 = load_GlotLID_v1(constants.MODEL_NAME, "model_v1.bin")
 model_2 = load_GlotLID_v2(constants.MODEL_NAME, "model_v2.bin")
 # @st.cache_resource
 def plot(label, prob):
@@ -189,7 +206,16 @@ def compute(sentences, version = 'v2'):
         A list of language probablities and labels for the given sentences.
     """
     progress_text = "Computing Language..."
-    model_choice = model_2 if version == 'v2' else model_1
     my_bar = st.progress(0, text=progress_text)
     probs = []
@@ -206,7 +232,7 @@ def compute(sentences, version = 'v2'):
         output_label_language = output_label.split('_')[0]
         # script control
-        if version in ['v2'] and output_label_language!= 'zxx':
             main_script, all_scripts = get_script(sent)
             output_label_script = output_label.split('_')[1]
@@ -247,8 +273,8 @@ with tab1:
     version = st.radio(
         "Choose model",
-        ["v1", "v2"],
-        captions=["GlotLID version 1", "GlotLID version 2 (more data and languages)"],
         index = 1,
         key = 'version_tab1',
         horizontal = True

     return model
+@st.cache_resource
+def load_OpenLID():
+    model_path = hf_hub_download(repo_id='laurievb/OpenLID', filename='model.bin')
+    model = fasttext.load_model(model_path)
+    return model
+@st.cache_resource
+def load_NLLB():
+    model_path = hf_hub_download(repo_id='facebook/fasttext-language-identification', filename='model.bin')
+    model = fasttext.load_model(model_path)
+    return model
 model_1 = load_GlotLID_v1(constants.MODEL_NAME, "model_v1.bin")
 model_2 = load_GlotLID_v2(constants.MODEL_NAME, "model_v2.bin")
+model_3 = load_OpenLID()
+model_4 = load_NLLB()
 # @st.cache_resource
 def plot(label, prob):
         A list of language probablities and labels for the given sentences.
     """
     progress_text = "Computing Language..."
+    if version == 'nllb-218':
+        model_choice = model_4
+    elif version == 'openlid-201':
+        model_choice = model_3
+    elif version == 'v2':
+        model_choice = model_2
+    else:
+        model_choice = model_1
     my_bar = st.progress(0, text=progress_text)
     probs = []
         output_label_language = output_label.split('_')[0]
         # script control
+        if version in ['v2', 'openlid-201', 'nllb-218'] and output_label_language!= 'zxx':
             main_script, all_scripts = get_script(sent)
             output_label_script = output_label.split('_')[1]
     version = st.radio(
         "Choose model",
+        ["nllb-218", "openlid-201", "v1", "v2"],
+        captions=["NLLB", "OpenLID", "GlotLID version 1", "GlotLID version 2 (more data and languages)"],
         index = 1,
         key = 'version_tab1',
         horizontal = True