Spaces:

drift-ai
/

faq-website

Runtime error

App Files Files Community

vincentclaes commited on Apr 10, 2023

Commit

4161807

1 Parent(s): a59370d

format code

Browse files

Files changed (2) hide show

app.py +22 -7
scrape_website.py +4 -14

app.py CHANGED Viewed

@@ -1,8 +1,14 @@
 import torch
-from peft import PeftModel
 import transformers
-import gradio as gr
 from scrape_website import process_webpage
 assert (
     "LlamaTokenizer" in transformers._import_structure["models.llama"]
 ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
@@ -19,6 +25,7 @@ else:
     device = "cpu"
 try:
     if torch.backends.mps.is_available():
         device = "mps"
 except:
@@ -71,6 +78,7 @@ def generate_prompt(instruction, input=None):
 {instruction}
 ### Response:"""
 if device != "cpu":
     model.half()
 model.eval()
@@ -122,7 +130,9 @@ g = gr.Interface(
         gr.components.Textbox(
             lines=2, label="FAQ", placeholder="Ask me anything about this website?"
         ),
-        gr.components.Textbox(lines=1, label="Website URL", placeholder="https://www.meet-drift.ai/"),
         # gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
         # gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
         # gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
@@ -139,11 +149,16 @@ g = gr.Interface(
     ],
     title="FAQ A Website",
     examples=[
-        ["Can you list the capabilities this company has in bullet points?", "https://www.meet-drift.ai/"],
         ["What's the name of the founder?", "https://www.meet-drift.ai/about"],
-        ["in 1 word what's the service the company is providing?", "https://www.meet-drift.ai/"],
-    ]
-    # description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).",
 )
 g.queue(concurrency_count=1)
 g.launch()

+import gradio as gr
 import torch
 import transformers
+# https://github.com/huggingface/peft
+# Parameter-Efficient Fine-Tuning (PEFT) methods enable efficient adaptation of pre-trained language models (PLMs)
+# to various downstream applications without fine-tuning all the model's parameters.
+from peft import PeftModel
 from scrape_website import process_webpage
 assert (
     "LlamaTokenizer" in transformers._import_structure["models.llama"]
 ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
     device = "cpu"
 try:
+    # mps device enables high-performance training on GPU for MacOS devices with Metal programming framework.
     if torch.backends.mps.is_available():
         device = "mps"
 except:
 {instruction}
 ### Response:"""
 if device != "cpu":
     model.half()
 model.eval()
         gr.components.Textbox(
             lines=2, label="FAQ", placeholder="Ask me anything about this website?"
         ),
+        gr.components.Textbox(
+            lines=1, label="Website URL", placeholder="https://www.meet-drift.ai/"
+        ),
         # gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
         # gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
         # gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
     ],
     title="FAQ A Website",
     examples=[
+        [
+            "Can you list the capabilities this company has in bullet points?",
+            "https://www.meet-drift.ai/",
+        ],
         ["What's the name of the founder?", "https://www.meet-drift.ai/about"],
+        [
+            "in 1 word what's the service the company is providing?",
+            "https://www.meet-drift.ai/",
+        ],
+    ],
 )
 g.queue(concurrency_count=1)
 g.launch()

scrape_website.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import requests
 from bs4 import BeautifulSoup
-TOKEN_CUT_OFF = 2500
-def process_webpage(url:str):
     # A set to keep track of visited pages
     visited_pages = set()
@@ -36,9 +35,6 @@ def process_webpage(url:str):
         text_list.append(text_content)
-    # Get the text content of the landing page
-    # get_child_pages(url)
     # Make a GET request to the page and get the HTML content
     response = requests.get(url)
     html_content = response.content
@@ -52,15 +48,9 @@ def process_webpage(url:str):
         for element in soup.find_all(tag):
             text_content += element.get_text() + " "
-    # # make main page as first item
-    # text_list.reverse()
-    # text_list_cut_off = text_list[:TOKEN_CUT_OFF]
-    # page_content = "\n".join(text_list_cut_off)
-    # # Print the text content of the landing page and all child pages
-    # print(page_content)
-    # return page_content
     print(text_content)
     return text_content
-if __name__ == '__main__':
-    process_webpage(url="https://www.meet-drift.ai/")

 import requests
 from bs4 import BeautifulSoup
+def process_webpage(url: str):
     # A set to keep track of visited pages
     visited_pages = set()
         text_list.append(text_content)
     # Make a GET request to the page and get the HTML content
     response = requests.get(url)
     html_content = response.content
         for element in soup.find_all(tag):
             text_content += element.get_text() + " "
     print(text_content)
     return text_content
+if __name__ == "__main__":
+    process_webpage(url="https://www.meet-drift.ai/")