Alibaba-NLP
/

gte-Qwen2-7B-instruct

Sentence Similarity

sentence-transformers

text-generation

text-generation-inference

text-embeddings-inference

Model card Files Files and versions

tongyx361 commited on Sep 17, 2024

Commit

85d4737

·

verified ·

1 Parent(s): f47e3b5

Fix eval_mteb.py of undefined variables

Files changed (1) hide show

scripts/eval_mteb.py +3 -3

scripts/eval_mteb.py CHANGED Viewed

@@ -544,9 +544,9 @@ class Wrapper:
     def _tokenize(self, sentences: List[str], is_query: bool):
-        batch_dict = tokenizer(sentences, max_length=max_length - 1, return_attention_mask=False, padding=False, truncation=True)
-        batch_dict['input_ids'] = [input_ids + [tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
-        batch_dict = tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt')
         batch_dict['is_causal'] = False
         return batch_dict

     def _tokenize(self, sentences: List[str], is_query: bool):
+        batch_dict = self.tokenizer(sentences, max_length=self.max_seq_length - 1, return_attention_mask=False, padding=False, truncation=True)
+        batch_dict['input_ids'] = [input_ids + [self.tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
+        batch_dict = self.tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt')
         batch_dict['is_causal'] = False
         return batch_dict