NickNYU commited on
Commit
c9543aa
·
1 Parent(s): cec08c7

rebase on master

Browse files
app.py CHANGED
@@ -1,59 +1,13 @@
1
- <<<<<<< HEAD
2
- import logging
3
- import sys
4
-
5
- import streamlit as st
6
- from dotenv import load_dotenv
7
-
8
- from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
9
-
10
- logging.basicConfig(
11
- stream=sys.stdout, level=logging.INFO
12
- ) # logging.DEBUG for more verbose output
13
- logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
14
-
15
- # Sidebar contents
16
- with st.sidebar:
17
- st.title("🤗💬 LLM Chat App")
18
- st.markdown(
19
- """
20
- ## About
21
- This app is an LLM-powered chatbot built using:
22
- - [Streamlit](https://streamlit.io/)
23
- - [LangChain](https://python.langchain.com/)
24
- - [X-Pipe](https://github.com/ctripcorp/x-pipe)
25
- """
26
- )
27
- # add_vertical_space(5)
28
- st.write("Made by Nick")
29
-
30
-
31
- def main() -> None:
32
- st.header("X-Pipe Wiki 机器人 💬")
33
- robot_manager = XPipeRobotManagerFactory.get_or_create(
34
- XPipeRobotRevision.HUGGINGFACE_VERSION_0
35
- )
36
- robot = robot_manager.get_robot()
37
- query = st.text_input("X-Pipe Wiki 问题:")
38
- if query:
39
- response = robot.ask(question=query)
40
- st.write(response)
41
-
42
-
43
- if __name__ == "__main__":
44
- load_dotenv()
45
- main()
46
- =======
47
  import logging
48
  import sys
49
 
50
  import streamlit as st
 
51
 
52
  from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
53
- from xpipe_wiki.robot_manager import XPipeWikiRobot, AzureOpenAIXPipeWikiRobot
54
 
55
  logging.basicConfig(
56
- stream=sys.stdout, level=logging.DEBUG
57
  ) # logging.DEBUG for more verbose output
58
  logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
59
 
@@ -76,7 +30,7 @@ with st.sidebar:
76
  def main() -> None:
77
  st.header("X-Pipe Wiki 机器人 💬")
78
  robot_manager = XPipeRobotManagerFactory.get_or_create(
79
- XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0
80
  )
81
  robot = robot_manager.get_robot()
82
  query = st.text_input("X-Pipe Wiki 问题:")
@@ -86,5 +40,5 @@ def main() -> None:
86
 
87
 
88
  if __name__ == "__main__":
 
89
  main()
90
- >>>>>>> 2bf20e0 (Update app.py)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import logging
2
  import sys
3
 
4
  import streamlit as st
5
+ from dotenv import load_dotenv
6
 
7
  from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
 
8
 
9
  logging.basicConfig(
10
+ stream=sys.stdout, level=logging.INFO
11
  ) # logging.DEBUG for more verbose output
12
  logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
13
 
 
30
  def main() -> None:
31
  st.header("X-Pipe Wiki 机器人 💬")
32
  robot_manager = XPipeRobotManagerFactory.get_or_create(
33
+ XPipeRobotRevision.HUGGINGFACE_VERSION_0
34
  )
35
  robot = robot_manager.get_robot()
36
  query = st.text_input("X-Pipe Wiki 问题:")
 
40
 
41
 
42
  if __name__ == "__main__":
43
+ load_dotenv()
44
  main()
 
llama/context.py CHANGED
@@ -1,4 +1,5 @@
1
  from abc import abstractmethod, ABC
 
2
 
3
  from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding, Document
4
  from llama_index import StorageContext
@@ -8,7 +9,6 @@ from langchain_manager.manager import BaseLangChainManager
8
 
9
 
10
  class ServiceContextManager(Lifecycle, ABC):
11
-
12
  @abstractmethod
13
  def get_service_context(self) -> ServiceContext:
14
  pass
@@ -40,23 +40,33 @@ class AzureServiceContextManager(ServiceContextManager):
40
  )
41
 
42
  def do_start(self) -> None:
43
- self.logger.info("[do_start][embedding] last used usage: %d",
44
- self.service_context.embed_model.total_tokens_used)
45
- self.logger.info("[do_start][predict] last used usage: %d",
46
- self.service_context.llm_predictor.total_tokens_used)
 
 
 
 
47
 
48
  def do_stop(self) -> None:
49
- self.logger.info("[do_stop][embedding] last used usage: %d",
50
- self.service_context.embed_model.total_tokens_used)
51
- self.logger.info("[do_stop][predict] last used usage: %d",
52
- self.service_context.llm_predictor.total_tokens_used)
 
 
 
 
53
 
54
  def do_dispose(self) -> None:
55
- self.logger.info("[do_dispose] total used token: %d", self.service_context.llm_predictor.total_tokens_used)
 
 
 
56
 
57
 
58
  class StorageContextManager(Lifecycle, ABC):
59
-
60
  @abstractmethod
61
  def get_storage_context(self) -> StorageContext:
62
  pass
@@ -65,9 +75,11 @@ class StorageContextManager(Lifecycle, ABC):
65
  class LocalStorageContextManager(StorageContextManager):
66
  storage_context: StorageContext
67
 
68
- def __init__(self,
69
- dataset_path: str = "./dataset",
70
- service_context_manager: ServiceContextManager = None) -> None:
 
 
71
  super().__init__()
72
  self.dataset_path = dataset_path
73
  self.service_context_manager = service_context_manager
@@ -77,8 +89,11 @@ class LocalStorageContextManager(StorageContextManager):
77
 
78
  def do_init(self) -> None:
79
  from llama.utils import is_local_storage_files_ready
 
80
  if is_local_storage_files_ready(self.dataset_path):
81
- self.storage_context = StorageContext.from_defaults(persist_dir=self.dataset_path)
 
 
82
  else:
83
  docs = self._download()
84
  self._indexing(docs)
@@ -94,14 +109,17 @@ class LocalStorageContextManager(StorageContextManager):
94
  def do_dispose(self) -> None:
95
  self.storage_context.persist(self.dataset_path)
96
 
97
- def _download(self) -> [Document]:
98
  from llama.data_loader import GithubLoader
 
99
  loader = GithubLoader()
100
  return loader.load()
101
 
102
- def _indexing(self, docs: [Document]):
103
  from llama_index import GPTVectorStoreIndex
104
- index = GPTVectorStoreIndex.from_documents(docs,
105
- service_context=self.service_context_manager.get_service_context())
 
 
106
  index.storage_context.persist(persist_dir=self.dataset_path)
107
  self.storage_context = index.storage_context
 
1
  from abc import abstractmethod, ABC
2
+ from typing import List
3
 
4
  from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding, Document
5
  from llama_index import StorageContext
 
9
 
10
 
11
  class ServiceContextManager(Lifecycle, ABC):
 
12
  @abstractmethod
13
  def get_service_context(self) -> ServiceContext:
14
  pass
 
40
  )
41
 
42
  def do_start(self) -> None:
43
+ self.logger.info(
44
+ "[do_start][embedding] last used usage: %d",
45
+ self.service_context.embed_model.total_tokens_used,
46
+ )
47
+ self.logger.info(
48
+ "[do_start][predict] last used usage: %d",
49
+ self.service_context.llm_predictor.total_tokens_used,
50
+ )
51
 
52
  def do_stop(self) -> None:
53
+ self.logger.info(
54
+ "[do_stop][embedding] last used usage: %d",
55
+ self.service_context.embed_model.total_tokens_used,
56
+ )
57
+ self.logger.info(
58
+ "[do_stop][predict] last used usage: %d",
59
+ self.service_context.llm_predictor.total_tokens_used,
60
+ )
61
 
62
  def do_dispose(self) -> None:
63
+ self.logger.info(
64
+ "[do_dispose] total used token: %d",
65
+ self.service_context.llm_predictor.total_tokens_used,
66
+ )
67
 
68
 
69
  class StorageContextManager(Lifecycle, ABC):
 
70
  @abstractmethod
71
  def get_storage_context(self) -> StorageContext:
72
  pass
 
75
  class LocalStorageContextManager(StorageContextManager):
76
  storage_context: StorageContext
77
 
78
+ def __init__(
79
+ self,
80
+ service_context_manager: ServiceContextManager,
81
+ dataset_path: str = "./dataset",
82
+ ) -> None:
83
  super().__init__()
84
  self.dataset_path = dataset_path
85
  self.service_context_manager = service_context_manager
 
89
 
90
  def do_init(self) -> None:
91
  from llama.utils import is_local_storage_files_ready
92
+
93
  if is_local_storage_files_ready(self.dataset_path):
94
+ self.storage_context = StorageContext.from_defaults(
95
+ persist_dir=self.dataset_path
96
+ )
97
  else:
98
  docs = self._download()
99
  self._indexing(docs)
 
109
  def do_dispose(self) -> None:
110
  self.storage_context.persist(self.dataset_path)
111
 
112
+ def _download(self) -> List[Document]:
113
  from llama.data_loader import GithubLoader
114
+
115
  loader = GithubLoader()
116
  return loader.load()
117
 
118
+ def _indexing(self, docs: List[Document]) -> None:
119
  from llama_index import GPTVectorStoreIndex
120
+
121
+ index = GPTVectorStoreIndex.from_documents(
122
+ docs, service_context=self.service_context_manager.get_service_context()
123
+ )
124
  index.storage_context.persist(persist_dir=self.dataset_path)
125
  self.storage_context = index.storage_context
llama/data_loader.py CHANGED
@@ -16,10 +16,10 @@ class WikiLoader(ABC):
16
 
17
  class GithubLoader(WikiLoader):
18
  def __init__(
19
- self,
20
- github_owner: Optional[str] = None,
21
- repo: Optional[str] = None,
22
- dirs: Optional[Sequence[str]] = None,
23
  ):
24
  super().__init__()
25
  self.owner = (
 
16
 
17
  class GithubLoader(WikiLoader):
18
  def __init__(
19
+ self,
20
+ github_owner: Optional[str] = None,
21
+ repo: Optional[str] = None,
22
+ dirs: Optional[Sequence[str]] = None,
23
  ):
24
  super().__init__()
25
  self.owner = (
requirements.txt CHANGED
@@ -4,4 +4,5 @@ streamlit
4
  ruff
5
  black
6
  mypy
7
- accelerate
 
 
4
  ruff
5
  black
6
  mypy
7
+ accelerate
8
+ python-dotenv
xpipe_wiki/robot_manager.py CHANGED
@@ -59,10 +59,6 @@ class AzureXPipeWikiRobotManager(XPipeWikiRobotManager):
59
  index = load_index_from_storage(
60
  storage_context=self.storage_context_manager.get_storage_context(),
61
  service_context=self.service_context_manager.get_service_context(),
62
- <<<<<<< HEAD
63
- =======
64
-
65
- >>>>>>> 13f45af (Update xpipe_wiki/robot_manager.py)
66
  )
67
  self.query_engine = index.as_query_engine(
68
  service_context=self.service_context_manager.get_service_context()
 
59
  index = load_index_from_storage(
60
  storage_context=self.storage_context_manager.get_storage_context(),
61
  service_context=self.service_context_manager.get_service_context(),
 
 
 
 
62
  )
63
  self.query_engine = index.as_query_engine(
64
  service_context=self.service_context_manager.get_service_context()