Spaces:
Sleeping
Sleeping
Update core/loader.py
Browse files- core/loader.py +7 -0
core/loader.py
CHANGED
|
@@ -76,6 +76,13 @@ class MVI_AI:
|
|
| 76 |
vocab_path=os.path.join(ARTIFACTS_DIR, "vocab.json")
|
| 77 |
)
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
# ===== LANGUAGE CORE =====
|
| 80 |
self.embedder = EmbeddingLayer(
|
| 81 |
self.tokenizer.vocab_size,
|
|
|
|
| 76 |
vocab_path=os.path.join(ARTIFACTS_DIR, "vocab.json")
|
| 77 |
)
|
| 78 |
|
| 79 |
+
# Ensure special tokens exist in fallback vocab
|
| 80 |
+
for tok in [HybridTokenizer.PAD_TOKEN, HybridTokenizer.UNK_TOKEN,
|
| 81 |
+
HybridTokenizer.BOS_TOKEN, HybridTokenizer.EOS_TOKEN]:
|
| 82 |
+
if tok not in self.tokenizer.vocab:
|
| 83 |
+
self.tokenizer.vocab[tok] = len(self.tokenizer.vocab)
|
| 84 |
+
self.tokenizer.inv_vocab = {idx: token for token, idx in self.tokenizer.vocab.items()}
|
| 85 |
+
|
| 86 |
# ===== LANGUAGE CORE =====
|
| 87 |
self.embedder = EmbeddingLayer(
|
| 88 |
self.tokenizer.vocab_size,
|