Spaces:
Sleeping
Sleeping
| import os | |
| import streamlit as st | |
| from pathlib import Path | |
| from io import StringIO | |
| #for textfiles | |
| from langchain.document_loaders import TextLoader | |
| #text splitter | |
| from langchain.text_splitter import CharacterTextSplitter | |
| #for using HugginFace models & embeddings | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain import HuggingFaceHub | |
| # Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html | |
| from langchain.vectorstores import FAISS | |
| #facebook vectorization | |
| from langchain.chains.question_answering import load_qa_chain | |
| #load pdf | |
| #vectorize db index with chromadb | |
| from langchain.indexes import VectorstoreIndexCreator | |
| from langchain.chains import RetrievalQA | |
| from langchain.document_loaders import UnstructuredPDFLoader | |
| os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["hf_api_key"] | |
| def init(): | |
| global embeddings, llm, llm2, chain | |
| # Embeddings | |
| embeddings = HuggingFaceEmbeddings() | |
| llm=HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large", model_kwargs={"temperature":0, "max_length":512}) | |
| chain = load_qa_chain(llm, chain_type="stuff") | |
| def pdf_file(txtFileObj): | |
| st.subheader('Uploaded PDF File:') | |
| st.write(txtFileObj.name) | |
| with open(txtFileObj.name, "wb") as f: | |
| f.write(txtFileObj.getbuffer()) | |
| loaders = [UnstructuredPDFLoader(txtFileObj.name)] | |
| index = VectorstoreIndexCreator( | |
| embedding=embeddings, | |
| text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loaders) | |
| chain = RetrievalQA.from_chain_type(llm=llm, | |
| chain_type="stuff", | |
| retriever=index.vectorstore.as_retriever(), | |
| input_key="question") | |
| st.subheader('Enter query') | |
| query = st.text_input('Ask anything about the Document you uploaded') | |
| if (query): | |
| answer = chain.run(question=query) | |
| st.subheader('Answer') | |
| st.write(answer) | |
| def text_file(txtFileObj): | |
| st.subheader('Uploaded Text File:') | |
| st.write(txtFileObj.name) | |
| #stringio = StringIO(txtFileObj.getvalue().decode("utf-8")) | |
| with open(txtFileObj.name, "wb") as f: | |
| f.write(txtFileObj.getbuffer()) | |
| loader = TextLoader(txtFileObj.name) | |
| documents = loader.load() | |
| # Text Splitter | |
| text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) | |
| docs = text_splitter.split_documents(documents) | |
| db = FAISS.from_documents(docs, embeddings) | |
| st.subheader('Enter query') | |
| query = st.text_input('Ask anything about the Document you uploaded') | |
| if (query): | |
| docs = db.similarity_search(query) | |
| answer = chain.run(input_documents=docs, question=query) | |
| st.subheader('Answer') | |
| st.write(answer) | |
| st.title('Document Q&A - Ask anything in your Document') | |
| st.subheader('This application can be used to upload text(.txt) and PDF(.pdf) files and ask questions about their contents.') | |
| init() | |
| st.sidebar.subheader('Upload document') | |
| uploaded_file = st.sidebar.file_uploader("Upload File",type=['txt','pdf']) | |
| if uploaded_file and Path(uploaded_file.name).suffix == '.txt': | |
| st.sidebar.info(Path(uploaded_file.name)) | |
| text_file(uploaded_file) | |
| if uploaded_file and Path(uploaded_file.name).suffix == '.pdf': | |
| pdf_file(uploaded_file) | |
| with st.sidebar.expander('File'): | |
| if (uploaded_file): | |
| st.info(uploaded_file.name) | |
| if os.path.exists('/content/'): | |
| st.info(os.listdir('/content/')) |