Spaces:
Sleeping
Sleeping
File size: 1,325 Bytes
3e7c848 5b7a9af 2ee99b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import streamlit as st
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
# import pickle
# import time
# Загрузка данных
@st.cache_data
def load_data_models():
data = pd.read_csv('tvshows.csv')
# data['description'] = data['description'].astype(str)
description_embeddings = np.load('description_embeddings_mpnet.npy')
index = faiss.read_index('faiss_index.index')
embedder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
return data, description_embeddings, index, embedder
data, description_embeddings, index, embedder = load_data_models()
def search_series(user_query, top_k=10):
query_embedding = embedder.encode([user_query], convert_to_tensor=True).cpu().numpy()
query_embedding = query_embedding / np.linalg.norm(query_embedding, axis=1, keepdims=True) # Нормализация
D, I = index.search(query_embedding, top_k)
results = data.iloc[I[0]].copy()
cosine_similarities = D[0]
results['cosine_similarity'] = cosine_similarities
return results[['tvshow_title', 'description', 'cosine_similarity']]
st.title('Поиск сериала по описанию')
input_text = st.text_area('Введите описание сериала')
st.write(search_series(input_text)) |