Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| import tiktoken | |
| import time | |
| from sentence_transformers import SentenceTransformer | |
| import os | |
| import torch | |
| from openai.embeddings_utils import get_embedding, cosine_similarity | |
| import os | |
| df = pd.read_pickle('entire_data.pkl') #to load 123.pkl back to the dataframe df | |
| embedder = SentenceTransformer('all-mpnet-base-v2') | |
| def search(query): | |
| n = 15 | |
| query_embedding = embedder.encode(query) | |
| df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1))) | |
| results = ( | |
| df.sort_values("similarity", ascending=False) | |
| .head(n)) | |
| resultlist = [] | |
| hlist = [] | |
| for r in results.index: | |
| if results.name[r] not in hlist: | |
| smalldf = results.loc[results.name == results.name[r]] | |
| smallarr = smalldf.similarity[r].max() | |
| sm =smalldf.rating[r].mean() | |
| if smalldf.shape[1] > 3: | |
| smalldf = smalldf[:3] | |
| resultlist.append( | |
| { | |
| "name":results.name[r], | |
| "description":results.description[r], | |
| "relevance score": smallarr.tolist(), | |
| "rating": sm.tolist(), | |
| "relevant_reviews": [ smalldf.text[s] for s in smalldf.index] | |
| }) | |
| hlist.append(results.name[r]) | |
| return resultlist | |
| def greet(query): | |
| bm25 = search(query) | |
| return bm25 | |
| demo = gr.Interface(fn=greet, inputs="text", outputs="json") | |
| demo.launch() |