Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| # import pickle | |
| # import time | |
| # Загрузка данных | |
| def load_data_models(): | |
| data = pd.read_csv('tvshows.csv') | |
| # data['description'] = data['description'].astype(str) | |
| description_embeddings = np.load('description_embeddings_mpnet.npy') | |
| index = faiss.read_index('faiss_index.index') | |
| embedder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2') | |
| return data, description_embeddings, index, embedder | |
| data, description_embeddings, index, embedder = load_data_models() | |
| def search_series(user_query, top_k=10): | |
| query_embedding = embedder.encode([user_query], convert_to_tensor=True).cpu().numpy() | |
| query_embedding = query_embedding / np.linalg.norm(query_embedding, axis=1, keepdims=True) # Нормализация | |
| D, I = index.search(query_embedding, top_k) | |
| results = data.iloc[I[0]].copy() | |
| cosine_similarities = D[0] | |
| results['cosine_similarity'] = cosine_similarities | |
| return results[['tvshow_title', 'description', 'cosine_similarity']] | |
| st.title('Поиск сериала по описанию') | |
| input_text = st.text_area('Введите описание сериала') | |
| st.write(search_series(input_text)) |