Spaces:
Build error
Build error
Adding App
Browse files- app.py +79 -0
- requirements.txt +5 -0
app.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
|
| 5 |
+
from sentence_transformers.util import cos_sim
|
| 6 |
+
from sentence_transformers import SentenceTransformer
|
| 7 |
+
from bokeh.plotting import figure, output_notebook, show, save
|
| 8 |
+
from bokeh.io import output_file, show
|
| 9 |
+
from bokeh.models import ColumnDataSource, HoverTool
|
| 10 |
+
from sklearn.manifold import TSNE
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@st.cache
|
| 14 |
+
def load_model():
|
| 15 |
+
model = SentenceTransformer('hackathon-pln-es/bertin-roberta-base-finetuning-esnli')
|
| 16 |
+
model.eval()
|
| 17 |
+
return model
|
| 18 |
+
|
| 19 |
+
@st.cache
|
| 20 |
+
def load_plot_data():
|
| 21 |
+
embs = np.load('semeval2015-embs.npy')
|
| 22 |
+
data = pd.read_csv('semeval2015-data.csv')
|
| 23 |
+
return embs, data
|
| 24 |
+
|
| 25 |
+
st.title("Sentence Embedding for Spanish with Bertin")
|
| 26 |
+
st.write("Sentence embedding for spanish trained on NLI. Used for Sentence Textual Similarity. Based on the model hackathon-pln-es/bertin-roberta-base-finetuning-esnli.")
|
| 27 |
+
st.write("Introduce two sentence to see their cosine similarity and a graph showing them in the embedding space.")
|
| 28 |
+
st.write("Authors: Anibal Pérez, Emilio Tomás Ariza, Lautaro Gesuelli y Mauricio Mazuecos.")
|
| 29 |
+
|
| 30 |
+
sent1 = st.text_area('Enter sentence 1')
|
| 31 |
+
sent2 = st.text_area('Enter sentence 2')
|
| 32 |
+
|
| 33 |
+
if st.button('Compute similarity'):
|
| 34 |
+
if sent1 and sent2:
|
| 35 |
+
model = load_model()
|
| 36 |
+
encodings = model.encode([sent1, sent2])
|
| 37 |
+
sim = cos_sim(encodings[0], encodings[1]).numpy().tolist()[0][0]
|
| 38 |
+
st.text('Cosine Similarity: {0:.4f}'.format(sim))
|
| 39 |
+
|
| 40 |
+
print('Generating visualization...')
|
| 41 |
+
sentembs, data = load_plot_data()
|
| 42 |
+
X_embedded = TSNE(n_components=2, learning_rate='auto',
|
| 43 |
+
init='random').fit_transform(np.concatenate([sentembs, encodings], axis=0))
|
| 44 |
+
|
| 45 |
+
data = data.append({'sent': sent1, 'color': '#F0E442'}, ignore_index=True) # sentence 1
|
| 46 |
+
data = data.append({'sent': sent2, 'color': '#D55E00'}, ignore_index=True) # sentence 2
|
| 47 |
+
data['x'] = X_embedded[:,0]
|
| 48 |
+
data['y'] = X_embedded[:,1]
|
| 49 |
+
|
| 50 |
+
source = ColumnDataSource(data)
|
| 51 |
+
|
| 52 |
+
p = figure(title="Embeddings in space")
|
| 53 |
+
p.circle(
|
| 54 |
+
x='x',
|
| 55 |
+
y='y',
|
| 56 |
+
legend_label="Objects",
|
| 57 |
+
#fill_color=["red"],
|
| 58 |
+
color='color',
|
| 59 |
+
fill_alpha=0.5,
|
| 60 |
+
line_color="blue",
|
| 61 |
+
size=14,
|
| 62 |
+
source=source
|
| 63 |
+
)
|
| 64 |
+
p.add_tools(HoverTool(
|
| 65 |
+
tooltips=[
|
| 66 |
+
('sent', '@sent')
|
| 67 |
+
],
|
| 68 |
+
formatters={
|
| 69 |
+
'@sent': 'printf'
|
| 70 |
+
},
|
| 71 |
+
mode='mouse'
|
| 72 |
+
))
|
| 73 |
+
st.bokeh_chart(p, use_container_width=True)
|
| 74 |
+
else:
|
| 75 |
+
st.write('Missing a sentences')
|
| 76 |
+
else:
|
| 77 |
+
pass
|
| 78 |
+
|
| 79 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
sentence-transformers==2.2.0
|
| 2 |
+
transformers==4.17.0
|
| 3 |
+
torch==1.10.2
|
| 4 |
+
sklearn==0.0
|
| 5 |
+
bokeh==2.4.1
|