import streamlit as st import pandas as pd import numpy as np from sentence_transformers import SentenceTransformer import faiss import pickle import time # Загрузка данных @st.cache_data def load_data_models(): data = pd.read_csv('tvshows.csv') # data['description'] = data['description'].astype(str) description_embeddings = np.load('description_embeddings_mpnet.npy') index = faiss.read_index('faiss_index.index') embedder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2') return data, description_embeddings, index, embedder data, description_embeddings, index, embedder = load_data_models() def search_series(user_query, top_k=10): query_embedding = embedder.encode([user_query], convert_to_tensor=True).cpu().numpy() query_embedding = query_embedding / np.linalg.norm(query_embedding, axis=1, keepdims=True) # Нормализация D, I = index.search(query_embedding, top_k) results = data.iloc[I[0]].copy() cosine_similarities = D[0] results['cosine_similarity'] = cosine_similarities return results[['tvshow_title', 'description', 'cosine_similarity']] st.title('Поиск сериала по описанию') input_text = st.text_area('Введите описание сериала') st.write(search_series(input_text))