Spaces:
Sleeping
Sleeping
Tuchuanhuhuhu
commited on
Commit
·
72e1ed6
1
Parent(s):
9c45970
使用tiktoken精确计数输入token
Browse files
utils.py
CHANGED
|
@@ -51,7 +51,7 @@ def postprocess(
|
|
| 51 |
def count_token(input_str):
|
| 52 |
print("计算输入Token计数中……")
|
| 53 |
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
| 54 |
-
length = len(encoding.encode(
|
| 55 |
print("计算完成!")
|
| 56 |
return length
|
| 57 |
|
|
@@ -138,7 +138,8 @@ def stream_predict(openai_api_key, system_prompt, history, inputs, chatbot, prev
|
|
| 138 |
history.append(construct_user(inputs))
|
| 139 |
user_token_count = 0
|
| 140 |
if len(previous_token_count) == 0:
|
| 141 |
-
|
|
|
|
| 142 |
else:
|
| 143 |
user_token_count = count_token(inputs)
|
| 144 |
print(f"输入token计数: {user_token_count}")
|
|
@@ -200,7 +201,7 @@ def predict_all(openai_api_key, system_prompt, history, inputs, chatbot, previou
|
|
| 200 |
|
| 201 |
|
| 202 |
def predict(openai_api_key, system_prompt, history, inputs, chatbot, token_count, top_p, temperature, stream=False, should_check_token_count = True): # repetition_penalty, top_k
|
| 203 |
-
print(colorama.Fore.BLUE + f"
|
| 204 |
if stream:
|
| 205 |
print("使用流式传输")
|
| 206 |
iter = stream_predict(openai_api_key, system_prompt, history, inputs, chatbot, token_count, top_p, temperature)
|
|
@@ -211,7 +212,7 @@ def predict(openai_api_key, system_prompt, history, inputs, chatbot, token_count
|
|
| 211 |
chatbot, history, status_text, token_count = predict_all(openai_api_key, system_prompt, history, inputs, chatbot, token_count, top_p, temperature)
|
| 212 |
yield chatbot, history, status_text, token_count
|
| 213 |
print(f"传输完毕。当前token计数为{token_count}")
|
| 214 |
-
print(colorama.Fore.BLUE + f"
|
| 215 |
if stream:
|
| 216 |
max_token = max_token_streaming
|
| 217 |
else:
|
|
|
|
| 51 |
def count_token(input_str):
|
| 52 |
print("计算输入Token计数中……")
|
| 53 |
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
| 54 |
+
length = len(encoding.encode(input_str))
|
| 55 |
print("计算完成!")
|
| 56 |
return length
|
| 57 |
|
|
|
|
| 138 |
history.append(construct_user(inputs))
|
| 139 |
user_token_count = 0
|
| 140 |
if len(previous_token_count) == 0:
|
| 141 |
+
system_prompt_token_count = count_token(system_prompt)
|
| 142 |
+
user_token_count = count_token(inputs) + system_prompt_token_count
|
| 143 |
else:
|
| 144 |
user_token_count = count_token(inputs)
|
| 145 |
print(f"输入token计数: {user_token_count}")
|
|
|
|
| 201 |
|
| 202 |
|
| 203 |
def predict(openai_api_key, system_prompt, history, inputs, chatbot, token_count, top_p, temperature, stream=False, should_check_token_count = True): # repetition_penalty, top_k
|
| 204 |
+
print("输入为:" +colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
|
| 205 |
if stream:
|
| 206 |
print("使用流式传输")
|
| 207 |
iter = stream_predict(openai_api_key, system_prompt, history, inputs, chatbot, token_count, top_p, temperature)
|
|
|
|
| 212 |
chatbot, history, status_text, token_count = predict_all(openai_api_key, system_prompt, history, inputs, chatbot, token_count, top_p, temperature)
|
| 213 |
yield chatbot, history, status_text, token_count
|
| 214 |
print(f"传输完毕。当前token计数为{token_count}")
|
| 215 |
+
print("回答为:" +colorama.Fore.BLUE + f"{history[-1]['content']}" + colorama.Style.RESET_ALL)
|
| 216 |
if stream:
|
| 217 |
max_token = max_token_streaming
|
| 218 |
else:
|