Spaces:
Sleeping
Sleeping
Tuchuanhuhuhu
commited on
Commit
·
215bf1c
1
Parent(s):
8c60761
补充缺失的依赖
Browse files- modules/index_func.py +5 -3
- requirements.txt +2 -0
modules/index_func.py
CHANGED
|
@@ -42,7 +42,7 @@ def get_documents(file_src):
|
|
| 42 |
for file in file_src:
|
| 43 |
filepath = file.name
|
| 44 |
filename = os.path.basename(filepath)
|
| 45 |
-
file_type = os.path.splitext(
|
| 46 |
logging.info(f"loading file: {filename}")
|
| 47 |
try:
|
| 48 |
if file_type == ".pdf":
|
|
@@ -87,8 +87,9 @@ def get_documents(file_src):
|
|
| 87 |
loader = TextLoader(filepath, "utf8")
|
| 88 |
texts = loader.load()
|
| 89 |
except Exception as e:
|
|
|
|
| 90 |
logging.error(f"Error loading file: {filename}")
|
| 91 |
-
|
| 92 |
|
| 93 |
texts = text_splitter.split_documents(texts)
|
| 94 |
documents.extend(texts)
|
|
@@ -142,6 +143,7 @@ def construct_index(
|
|
| 142 |
return index
|
| 143 |
|
| 144 |
except Exception as e:
|
|
|
|
| 145 |
logging.error("索引构建失败!", e)
|
| 146 |
-
|
| 147 |
return None
|
|
|
|
| 42 |
for file in file_src:
|
| 43 |
filepath = file.name
|
| 44 |
filename = os.path.basename(filepath)
|
| 45 |
+
file_type = os.path.splitext(filename)[1]
|
| 46 |
logging.info(f"loading file: {filename}")
|
| 47 |
try:
|
| 48 |
if file_type == ".pdf":
|
|
|
|
| 87 |
loader = TextLoader(filepath, "utf8")
|
| 88 |
texts = loader.load()
|
| 89 |
except Exception as e:
|
| 90 |
+
import traceback
|
| 91 |
logging.error(f"Error loading file: {filename}")
|
| 92 |
+
traceback.print_exc()
|
| 93 |
|
| 94 |
texts = text_splitter.split_documents(texts)
|
| 95 |
documents.extend(texts)
|
|
|
|
| 143 |
return index
|
| 144 |
|
| 145 |
except Exception as e:
|
| 146 |
+
import traceback
|
| 147 |
logging.error("索引构建失败!", e)
|
| 148 |
+
traceback.print_exc()
|
| 149 |
return None
|
requirements.txt
CHANGED
|
@@ -22,3 +22,5 @@ google-search-results
|
|
| 22 |
arxiv
|
| 23 |
wikipedia
|
| 24 |
google.generativeai
|
|
|
|
|
|
|
|
|
| 22 |
arxiv
|
| 23 |
wikipedia
|
| 24 |
google.generativeai
|
| 25 |
+
openai
|
| 26 |
+
unstructured
|