Musombi commited on
Commit
ee66722
·
verified ·
1 Parent(s): 991977e

Update core/loader.py

Browse files
Files changed (1) hide show
  1. core/loader.py +7 -0
core/loader.py CHANGED
@@ -76,6 +76,13 @@ class MVI_AI:
76
  vocab_path=os.path.join(ARTIFACTS_DIR, "vocab.json")
77
  )
78
 
 
 
 
 
 
 
 
79
  # ===== LANGUAGE CORE =====
80
  self.embedder = EmbeddingLayer(
81
  self.tokenizer.vocab_size,
 
76
  vocab_path=os.path.join(ARTIFACTS_DIR, "vocab.json")
77
  )
78
 
79
+ # Ensure special tokens exist in fallback vocab
80
+ for tok in [HybridTokenizer.PAD_TOKEN, HybridTokenizer.UNK_TOKEN,
81
+ HybridTokenizer.BOS_TOKEN, HybridTokenizer.EOS_TOKEN]:
82
+ if tok not in self.tokenizer.vocab:
83
+ self.tokenizer.vocab[tok] = len(self.tokenizer.vocab)
84
+ self.tokenizer.inv_vocab = {idx: token for token, idx in self.tokenizer.vocab.items()}
85
+
86
  # ===== LANGUAGE CORE =====
87
  self.embedder = EmbeddingLayer(
88
  self.tokenizer.vocab_size,