DeepMostInnovations
/

hindi-embedding-foundational-model

Sentence Similarity

sentence-embeddings

semantic-search

text-similarity

Model card Files Files and versions

DeepMostInnovations commited on Mar 10

Commit

f40b58a

·

verified ·

1 Parent(s): 7e8e702

Add inference script

Files changed (1) hide show

hindi_embeddings.py +3 -9

hindi_embeddings.py CHANGED Viewed

@@ -384,24 +384,18 @@ class SentenceEmbeddingModel(nn.Module):
         return pooled_output
 class HindiEmbedder:
-    def __init__(self, model_path="/home/ubuntu/output/hindi-embeddings-custom-tokenizer/final", tokenizer_path=None):
         """
         Initialize the Hindi sentence embedder.
         Args:
             model_path: Path to the model directory
-            tokenizer_path: Optional path to tokenizer. If None, will look in the model directory.
         """
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
-        # Load tokenizer
-        if tokenizer_path is None:
-            # Try standard location in model directory
-            tokenizer_path = os.path.join(model_path, "tokenizer.model")
-            if not os.path.exists(tokenizer_path):
-                # Try original location
-                tokenizer_path = "/home/ubuntu/hindi_tokenizer/tokenizer.model"
         if not os.path.exists(tokenizer_path):
             raise FileNotFoundError(f"Could not find tokenizer at {tokenizer_path}")

         return pooled_output
 class HindiEmbedder:
+    def __init__(self, model_path="/home/ubuntu/output/hindi-embeddings-custom-tokenizer/final"):
         """
         Initialize the Hindi sentence embedder.
         Args:
             model_path: Path to the model directory
         """
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
+        # Load tokenizer - look for it in the model directory
+        tokenizer_path = os.path.join(model_path, "tokenizer.model")
         if not os.path.exists(tokenizer_path):
             raise FileNotFoundError(f"Could not find tokenizer at {tokenizer_path}")