find_my_show / app.py
VerVelVel's picture
requirements
5b7a9af
raw
history blame
1.33 kB
import streamlit as st
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
# import pickle
# import time
# Загрузка данных
@st.cache_data
def load_data_models():
data = pd.read_csv('tvshows.csv')
# data['description'] = data['description'].astype(str)
description_embeddings = np.load('description_embeddings_mpnet.npy')
index = faiss.read_index('faiss_index.index')
embedder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
return data, description_embeddings, index, embedder
data, description_embeddings, index, embedder = load_data_models()
def search_series(user_query, top_k=10):
query_embedding = embedder.encode([user_query], convert_to_tensor=True).cpu().numpy()
query_embedding = query_embedding / np.linalg.norm(query_embedding, axis=1, keepdims=True) # Нормализация
D, I = index.search(query_embedding, top_k)
results = data.iloc[I[0]].copy()
cosine_similarities = D[0]
results['cosine_similarity'] = cosine_similarities
return results[['tvshow_title', 'description', 'cosine_similarity']]
st.title('Поиск сериала по описанию')
input_text = st.text_area('Введите описание сериала')
st.write(search_series(input_text))