File size: 1,325 Bytes
3e7c848
 
 
 
 
5b7a9af
 
2ee99b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import streamlit as st
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
# import pickle
# import time

# Загрузка данных
@st.cache_data
def load_data_models():
    data = pd.read_csv('tvshows.csv')
    # data['description'] = data['description'].astype(str)
    description_embeddings = np.load('description_embeddings_mpnet.npy')
    index = faiss.read_index('faiss_index.index')
    embedder = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')

    return data, description_embeddings, index, embedder

data, description_embeddings, index, embedder = load_data_models()

def search_series(user_query, top_k=10):
    query_embedding = embedder.encode([user_query], convert_to_tensor=True).cpu().numpy()
    query_embedding = query_embedding / np.linalg.norm(query_embedding, axis=1, keepdims=True)  # Нормализация
    D, I = index.search(query_embedding, top_k)
    results = data.iloc[I[0]].copy()
    cosine_similarities = D[0]
    results['cosine_similarity'] = cosine_similarities
    return results[['tvshow_title', 'description', 'cosine_similarity']]

st.title('Поиск сериала по описанию')
input_text = st.text_area('Введите описание сериала')
st.write(search_series(input_text))