BAAI
/

AquilaChat2-34B-16K

Text Generation

Model card Files Files and versions

ldwang commited on Oct 24, 2023

Commit

e738e43

·

1 Parent(s): 4e307b5

Upload predict.py

Files changed (1) hide show

predict.py +9 -5

predict.py CHANGED Viewed

@@ -310,6 +310,9 @@ def covert_prompt_to_input_ids_with_history(text, history, tokenizer, max_token,
     example = tokenizer.encode_plus(f"{conv.get_prompt()} ", None, max_length=None)['input_ids']
     while(len(history) > 0 and (len(example) < max_token)):
         tmp = history.pop()
         if tmp[0] == 'ASSISTANT':
@@ -333,7 +336,7 @@ def predict(model, text, tokenizer=None,
             sft=True, convo_template = "",
             device = "cuda",
             model_name="AquilaChat2-7B",
-            history=[],
             **kwargs):
     vocab = tokenizer.get_vocab()
@@ -353,7 +356,7 @@ def predict(model, text, tokenizer=None,
         topk = 1
         temperature = 1.0
     if sft:
-        tokens = covert_prompt_to_input_ids_with_history(text, history=history, tokenizer=tokenizer, max_token=1000000, convo_template=convo_template)
         tokens = torch.tensor(tokens)[None,].to(device)
     else :
         tokens = tokenizer.encode_plus(text)["input_ids"]
@@ -435,8 +438,9 @@ def predict(model, text, tokenizer=None,
         convert_tokens = convert_tokens[1:]
         probs = probs[1:]
-    # Update history
-    history.insert(0, ('ASSISTANT', out))
-    history.insert(0, ('USER', text))
     return out

     example = tokenizer.encode_plus(f"{conv.get_prompt()} ", None, max_length=None)['input_ids']
+    if history is None or not isinstance(history, list):
+      history = []
     while(len(history) > 0 and (len(example) < max_token)):
         tmp = history.pop()
         if tmp[0] == 'ASSISTANT':
             sft=True, convo_template = "",
             device = "cuda",
             model_name="AquilaChat2-7B",
+            history=None,
             **kwargs):
     vocab = tokenizer.get_vocab()
         topk = 1
         temperature = 1.0
     if sft:
+        tokens = covert_prompt_to_input_ids_with_history(text, history=history, tokenizer=tokenizer, max_token=2048, convo_template=convo_template)
         tokens = torch.tensor(tokens)[None,].to(device)
     else :
         tokens = tokenizer.encode_plus(text)["input_ids"]
         convert_tokens = convert_tokens[1:]
         probs = probs[1:]
+    if isinstance(history, list):
+        # Update history
+        history.insert(0, ('ASSISTANT', out))
+        history.insert(0, ('USER', text))
     return out