Spaces:
Sleeping
Sleeping
合并冲突,加入更多语言
Browse files- ChuanhuChatbot.py +9 -0
- modules/chat_func.py +10 -2
- modules/llama_func.py +5 -2
- modules/presets.py +15 -4
ChuanhuChatbot.py
CHANGED
|
@@ -169,6 +169,12 @@ with gr.Blocks(
|
|
| 169 |
label="实时传输回答", value=True, visible=enable_streaming_option
|
| 170 |
)
|
| 171 |
use_websearch_checkbox = gr.Checkbox(label="使用在线搜索", value=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
index_files = gr.Files(label="上传索引文件", type="file", multiple=True)
|
| 173 |
|
| 174 |
with gr.Tab(label="Prompt"):
|
|
@@ -289,6 +295,7 @@ with gr.Blocks(
|
|
| 289 |
model_select_dropdown,
|
| 290 |
use_websearch_checkbox,
|
| 291 |
index_files,
|
|
|
|
| 292 |
],
|
| 293 |
outputs=[chatbot, history, status_display, token_count],
|
| 294 |
show_progress=True,
|
|
@@ -340,6 +347,7 @@ with gr.Blocks(
|
|
| 340 |
temperature,
|
| 341 |
use_streaming_checkbox,
|
| 342 |
model_select_dropdown,
|
|
|
|
| 343 |
],
|
| 344 |
[chatbot, history, status_display, token_count],
|
| 345 |
show_progress=True,
|
|
@@ -364,6 +372,7 @@ with gr.Blocks(
|
|
| 364 |
temperature,
|
| 365 |
gr.State(0),
|
| 366 |
model_select_dropdown,
|
|
|
|
| 367 |
],
|
| 368 |
[chatbot, history, status_display, token_count],
|
| 369 |
show_progress=True,
|
|
|
|
| 169 |
label="实时传输回答", value=True, visible=enable_streaming_option
|
| 170 |
)
|
| 171 |
use_websearch_checkbox = gr.Checkbox(label="使用在线搜索", value=False)
|
| 172 |
+
language_select_dropdown = gr.Dropdown(
|
| 173 |
+
label="选择回复语言(针对搜索&索引功能)",
|
| 174 |
+
choices=REPLY_LANGUAGES,
|
| 175 |
+
multiselect=False,
|
| 176 |
+
value=REPLY_LANGUAGES[0]
|
| 177 |
+
)
|
| 178 |
index_files = gr.Files(label="上传索引文件", type="file", multiple=True)
|
| 179 |
|
| 180 |
with gr.Tab(label="Prompt"):
|
|
|
|
| 295 |
model_select_dropdown,
|
| 296 |
use_websearch_checkbox,
|
| 297 |
index_files,
|
| 298 |
+
language_select_dropdown,
|
| 299 |
],
|
| 300 |
outputs=[chatbot, history, status_display, token_count],
|
| 301 |
show_progress=True,
|
|
|
|
| 347 |
temperature,
|
| 348 |
use_streaming_checkbox,
|
| 349 |
model_select_dropdown,
|
| 350 |
+
language_select_dropdown,
|
| 351 |
],
|
| 352 |
[chatbot, history, status_display, token_count],
|
| 353 |
show_progress=True,
|
|
|
|
| 372 |
temperature,
|
| 373 |
gr.State(0),
|
| 374 |
model_select_dropdown,
|
| 375 |
+
language_select_dropdown,
|
| 376 |
],
|
| 377 |
[chatbot, history, status_display, token_count],
|
| 378 |
show_progress=True,
|
modules/chat_func.py
CHANGED
|
@@ -264,9 +264,12 @@ def predict(
|
|
| 264 |
selected_model=MODELS[0],
|
| 265 |
use_websearch=False,
|
| 266 |
files = None,
|
|
|
|
| 267 |
should_check_token_count=True,
|
| 268 |
): # repetition_penalty, top_k
|
| 269 |
logging.info("输入为:" + colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
|
|
|
|
|
|
|
| 270 |
if files:
|
| 271 |
msg = "构建索引中……(这可能需要比较久的时间)"
|
| 272 |
logging.info(msg)
|
|
@@ -274,7 +277,7 @@ def predict(
|
|
| 274 |
index = construct_index(openai_api_key, file_src=files)
|
| 275 |
msg = "索引构建完成,获取回答中……"
|
| 276 |
yield chatbot+[(inputs, "")], history, msg, all_token_counts
|
| 277 |
-
history, chatbot, status_text = chat_ai(openai_api_key, index, inputs, history, chatbot)
|
| 278 |
yield chatbot, history, status_text, all_token_counts
|
| 279 |
return
|
| 280 |
|
|
@@ -294,6 +297,7 @@ def predict(
|
|
| 294 |
replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
|
| 295 |
.replace("{query}", inputs)
|
| 296 |
.replace("{web_results}", "\n\n".join(web_results))
|
|
|
|
| 297 |
)
|
| 298 |
else:
|
| 299 |
link_references = ""
|
|
@@ -399,6 +403,7 @@ def retry(
|
|
| 399 |
temperature,
|
| 400 |
stream=False,
|
| 401 |
selected_model=MODELS[0],
|
|
|
|
| 402 |
):
|
| 403 |
logging.info("重试中……")
|
| 404 |
if len(history) == 0:
|
|
@@ -418,6 +423,7 @@ def retry(
|
|
| 418 |
temperature,
|
| 419 |
stream=stream,
|
| 420 |
selected_model=selected_model,
|
|
|
|
| 421 |
)
|
| 422 |
logging.info("重试中……")
|
| 423 |
for x in iter:
|
|
@@ -435,6 +441,7 @@ def reduce_token_size(
|
|
| 435 |
temperature,
|
| 436 |
max_token_count,
|
| 437 |
selected_model=MODELS[0],
|
|
|
|
| 438 |
):
|
| 439 |
logging.info("开始减少token数量……")
|
| 440 |
iter = predict(
|
|
@@ -448,6 +455,7 @@ def reduce_token_size(
|
|
| 448 |
temperature,
|
| 449 |
selected_model=selected_model,
|
| 450 |
should_check_token_count=False,
|
|
|
|
| 451 |
)
|
| 452 |
logging.info(f"chatbot: {chatbot}")
|
| 453 |
flag = False
|
|
@@ -463,4 +471,4 @@ def reduce_token_size(
|
|
| 463 |
sum(token_count) if len(token_count) > 0 else 0,
|
| 464 |
), token_count
|
| 465 |
logging.info(msg)
|
| 466 |
-
logging.info("减少token数量完毕")
|
|
|
|
| 264 |
selected_model=MODELS[0],
|
| 265 |
use_websearch=False,
|
| 266 |
files = None,
|
| 267 |
+
reply_language="中文",
|
| 268 |
should_check_token_count=True,
|
| 269 |
): # repetition_penalty, top_k
|
| 270 |
logging.info("输入为:" + colorama.Fore.BLUE + f"{inputs}" + colorama.Style.RESET_ALL)
|
| 271 |
+
if reply_language == "跟随问题语言(不稳定)":
|
| 272 |
+
reply_language = "the same language as the question, such as English, 中文, 日本語, Español, Français, or Deutsch."
|
| 273 |
if files:
|
| 274 |
msg = "构建索引中……(这可能需要比较久的时间)"
|
| 275 |
logging.info(msg)
|
|
|
|
| 277 |
index = construct_index(openai_api_key, file_src=files)
|
| 278 |
msg = "索引构建完成,获取回答中……"
|
| 279 |
yield chatbot+[(inputs, "")], history, msg, all_token_counts
|
| 280 |
+
history, chatbot, status_text = chat_ai(openai_api_key, index, inputs, history, chatbot, reply_language)
|
| 281 |
yield chatbot, history, status_text, all_token_counts
|
| 282 |
return
|
| 283 |
|
|
|
|
| 297 |
replace_today(WEBSEARCH_PTOMPT_TEMPLATE)
|
| 298 |
.replace("{query}", inputs)
|
| 299 |
.replace("{web_results}", "\n\n".join(web_results))
|
| 300 |
+
.replace("{reply_language}", reply_language )
|
| 301 |
)
|
| 302 |
else:
|
| 303 |
link_references = ""
|
|
|
|
| 403 |
temperature,
|
| 404 |
stream=False,
|
| 405 |
selected_model=MODELS[0],
|
| 406 |
+
reply_language="中文",
|
| 407 |
):
|
| 408 |
logging.info("重试中……")
|
| 409 |
if len(history) == 0:
|
|
|
|
| 423 |
temperature,
|
| 424 |
stream=stream,
|
| 425 |
selected_model=selected_model,
|
| 426 |
+
reply_language=reply_language,
|
| 427 |
)
|
| 428 |
logging.info("重试中……")
|
| 429 |
for x in iter:
|
|
|
|
| 441 |
temperature,
|
| 442 |
max_token_count,
|
| 443 |
selected_model=MODELS[0],
|
| 444 |
+
reply_language="中文",
|
| 445 |
):
|
| 446 |
logging.info("开始减少token数量……")
|
| 447 |
iter = predict(
|
|
|
|
| 455 |
temperature,
|
| 456 |
selected_model=selected_model,
|
| 457 |
should_check_token_count=False,
|
| 458 |
+
reply_language=reply_language,
|
| 459 |
)
|
| 460 |
logging.info(f"chatbot: {chatbot}")
|
| 461 |
flag = False
|
|
|
|
| 471 |
sum(token_count) if len(token_count) > 0 else 0,
|
| 472 |
), token_count
|
| 473 |
logging.info(msg)
|
| 474 |
+
logging.info("减少token数量完毕")
|
modules/llama_func.py
CHANGED
|
@@ -102,6 +102,7 @@ def chat_ai(
|
|
| 102 |
question,
|
| 103 |
context,
|
| 104 |
chatbot,
|
|
|
|
| 105 |
):
|
| 106 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 107 |
|
|
@@ -116,6 +117,7 @@ def chat_ai(
|
|
| 116 |
SIM_K,
|
| 117 |
INDEX_QUERY_TEMPRATURE,
|
| 118 |
context,
|
|
|
|
| 119 |
)
|
| 120 |
if response is None:
|
| 121 |
status_text = "查询失败,请换个问法试试"
|
|
@@ -139,6 +141,7 @@ def ask_ai(
|
|
| 139 |
sim_k=1,
|
| 140 |
temprature=0,
|
| 141 |
prefix_messages=[],
|
|
|
|
| 142 |
):
|
| 143 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 144 |
|
|
@@ -153,8 +156,8 @@ def ask_ai(
|
|
| 153 |
)
|
| 154 |
|
| 155 |
response = None # Initialize response variable to avoid UnboundLocalError
|
| 156 |
-
qa_prompt = QuestionAnswerPrompt(prompt_tmpl)
|
| 157 |
-
rf_prompt = RefinePrompt(refine_tmpl)
|
| 158 |
response = index.query(
|
| 159 |
question,
|
| 160 |
llm_predictor=llm_predictor,
|
|
|
|
| 102 |
question,
|
| 103 |
context,
|
| 104 |
chatbot,
|
| 105 |
+
reply_language,
|
| 106 |
):
|
| 107 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 108 |
|
|
|
|
| 117 |
SIM_K,
|
| 118 |
INDEX_QUERY_TEMPRATURE,
|
| 119 |
context,
|
| 120 |
+
reply_language,
|
| 121 |
)
|
| 122 |
if response is None:
|
| 123 |
status_text = "查询失败,请换个问法试试"
|
|
|
|
| 141 |
sim_k=1,
|
| 142 |
temprature=0,
|
| 143 |
prefix_messages=[],
|
| 144 |
+
reply_language="中文",
|
| 145 |
):
|
| 146 |
os.environ["OPENAI_API_KEY"] = api_key
|
| 147 |
|
|
|
|
| 156 |
)
|
| 157 |
|
| 158 |
response = None # Initialize response variable to avoid UnboundLocalError
|
| 159 |
+
qa_prompt = QuestionAnswerPrompt(prompt_tmpl.replace("{reply_language}", reply_language))
|
| 160 |
+
rf_prompt = RefinePrompt(refine_tmpl.replace("{reply_language}", reply_language))
|
| 161 |
response = index.query(
|
| 162 |
question,
|
| 163 |
llm_predictor=llm_predictor,
|
modules/presets.py
CHANGED
|
@@ -17,7 +17,7 @@ no_apikey_msg = "API key长度不是51位,请检查是否输入正确。" # A
|
|
| 17 |
no_input_msg = "请输入对话内容。" # 未输入对话内容
|
| 18 |
|
| 19 |
max_token_streaming = 3500 # 流式对话时的最大 token 数
|
| 20 |
-
timeout_streaming =
|
| 21 |
max_token_all = 3500 # 非流式对话时的最大 token 数
|
| 22 |
timeout_all = 200 # 非流式对话时的超时时间
|
| 23 |
enable_streaming_option = True # 是否启用选择选择是否实时显示回答的勾选框
|
|
@@ -50,6 +50,16 @@ MODELS = [
|
|
| 50 |
"gpt-4-32k-0314",
|
| 51 |
] # 可选的模型
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
WEBSEARCH_PTOMPT_TEMPLATE = """\
|
| 55 |
Web search results:
|
|
@@ -59,7 +69,8 @@ Current date: {current_date}
|
|
| 59 |
|
| 60 |
Instructions: Using the provided web search results, write a comprehensive reply to the given query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.
|
| 61 |
Query: {query}
|
| 62 |
-
Reply in
|
|
|
|
| 63 |
|
| 64 |
PROMPT_TEMPLATE = """\
|
| 65 |
Context information is below.
|
|
@@ -72,7 +83,7 @@ Make sure to cite results using [number] notation after the reference.
|
|
| 72 |
If the provided context information refer to multiple subjects with the same name, write separate answers for each subject.
|
| 73 |
Use prior knowledge only if the given context didn't provide enough information.
|
| 74 |
Answer the question: {query_str}
|
| 75 |
-
Reply in
|
| 76 |
"""
|
| 77 |
|
| 78 |
REFINE_TEMPLATE = """\
|
|
@@ -84,6 +95,6 @@ We have the opportunity to refine the existing answer
|
|
| 84 |
{context_msg}
|
| 85 |
------------
|
| 86 |
Given the new context, refine the original answer to better
|
| 87 |
-
|
| 88 |
If the context isn't useful, return the original answer.
|
| 89 |
"""
|
|
|
|
| 17 |
no_input_msg = "请输入对话内容。" # 未输入对话内容
|
| 18 |
|
| 19 |
max_token_streaming = 3500 # 流式对话时的最大 token 数
|
| 20 |
+
timeout_streaming = 10 # 流式对话时的超时时间
|
| 21 |
max_token_all = 3500 # 非流式对话时的最大 token 数
|
| 22 |
timeout_all = 200 # 非流式对话时的超时时间
|
| 23 |
enable_streaming_option = True # 是否启用选择选择是否实时显示回答的勾选框
|
|
|
|
| 50 |
"gpt-4-32k-0314",
|
| 51 |
] # 可选的模型
|
| 52 |
|
| 53 |
+
REPLY_LANGUAGES = [
|
| 54 |
+
"中文",
|
| 55 |
+
"English",
|
| 56 |
+
"日本語",
|
| 57 |
+
"Español",
|
| 58 |
+
"Français",
|
| 59 |
+
"Deutsch",
|
| 60 |
+
"跟随问题语言(不稳定)"
|
| 61 |
+
]
|
| 62 |
+
|
| 63 |
|
| 64 |
WEBSEARCH_PTOMPT_TEMPLATE = """\
|
| 65 |
Web search results:
|
|
|
|
| 69 |
|
| 70 |
Instructions: Using the provided web search results, write a comprehensive reply to the given query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.
|
| 71 |
Query: {query}
|
| 72 |
+
Reply in {reply_language}
|
| 73 |
+
"""
|
| 74 |
|
| 75 |
PROMPT_TEMPLATE = """\
|
| 76 |
Context information is below.
|
|
|
|
| 83 |
If the provided context information refer to multiple subjects with the same name, write separate answers for each subject.
|
| 84 |
Use prior knowledge only if the given context didn't provide enough information.
|
| 85 |
Answer the question: {query_str}
|
| 86 |
+
Reply in {reply_language}
|
| 87 |
"""
|
| 88 |
|
| 89 |
REFINE_TEMPLATE = """\
|
|
|
|
| 95 |
{context_msg}
|
| 96 |
------------
|
| 97 |
Given the new context, refine the original answer to better
|
| 98 |
+
Reply in {reply_language}
|
| 99 |
If the context isn't useful, return the original answer.
|
| 100 |
"""
|