Update README.md
Browse files
README.md
CHANGED
|
@@ -30,15 +30,34 @@ pip install -U sentence-transformers
|
|
| 30 |
Then you can use the model like this:
|
| 31 |
|
| 32 |
```python
|
| 33 |
-
from sentence_transformers import SentenceTransformer
|
| 34 |
-
sentences = ["Una ragazza si acconcia i capelli.", "Una ragazza si sta spazzolando i capelli."]
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
print(embeddings)
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
```
|
| 41 |
|
|
|
|
|
|
|
| 42 |
## Usage (HuggingFace Transformers)
|
| 43 |
Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
|
| 44 |
|
|
@@ -55,7 +74,8 @@ def mean_pooling(model_output, attention_mask):
|
|
| 55 |
|
| 56 |
|
| 57 |
# Sentences we want sentence embeddings for
|
| 58 |
-
|
|
|
|
| 59 |
|
| 60 |
# Load model from HuggingFace Hub
|
| 61 |
tokenizer = AutoTokenizer.from_pretrained('nickprock/sentence-bert-base-italian-uncased')
|
|
@@ -73,7 +93,6 @@ sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']
|
|
| 73 |
|
| 74 |
print("Sentence embeddings:")
|
| 75 |
print(sentence_embeddings)
|
| 76 |
-
|
| 77 |
```
|
| 78 |
|
| 79 |
|
|
|
|
| 30 |
Then you can use the model like this:
|
| 31 |
|
| 32 |
```python
|
| 33 |
+
from sentence_transformers import SentenceTransformer, util
|
|
|
|
| 34 |
|
| 35 |
+
query = "Quante persone vivono a Londra?"
|
| 36 |
+
docs = ["A Londra vivono circa 9 milioni di persone", "Londra è conosciuta per il suo quartiere finanziario"]
|
|
|
|
| 37 |
|
| 38 |
+
#Load the model
|
| 39 |
+
model = SentenceTransformer('nickprock/mmarco-bert-base-italian-uncased')
|
| 40 |
+
|
| 41 |
+
#Encode query and documents
|
| 42 |
+
query_emb = model.encode(query)
|
| 43 |
+
doc_emb = model.encode(docs)
|
| 44 |
+
|
| 45 |
+
#Compute dot score between query and all document embeddings
|
| 46 |
+
scores = util.dot_score(query_emb, doc_emb)[0].cpu().tolist()
|
| 47 |
+
|
| 48 |
+
#Combine docs & scores
|
| 49 |
+
doc_score_pairs = list(zip(docs, scores))
|
| 50 |
+
|
| 51 |
+
#Sort by decreasing score
|
| 52 |
+
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
|
| 53 |
+
|
| 54 |
+
#Output passages & scores
|
| 55 |
+
for doc, score in doc_score_pairs:
|
| 56 |
+
print(score, doc)
|
| 57 |
```
|
| 58 |
|
| 59 |
+
|
| 60 |
+
|
| 61 |
## Usage (HuggingFace Transformers)
|
| 62 |
Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
|
| 63 |
|
|
|
|
| 74 |
|
| 75 |
|
| 76 |
# Sentences we want sentence embeddings for
|
| 77 |
+
query = "Quante persone vivono a Londra?"
|
| 78 |
+
docs = ["A Londra vivono circa 9 milioni di persone", "Londra è conosciuta per il suo quartiere finanziario"]
|
| 79 |
|
| 80 |
# Load model from HuggingFace Hub
|
| 81 |
tokenizer = AutoTokenizer.from_pretrained('nickprock/sentence-bert-base-italian-uncased')
|
|
|
|
| 93 |
|
| 94 |
print("Sentence embeddings:")
|
| 95 |
print(sentence_embeddings)
|
|
|
|
| 96 |
```
|
| 97 |
|
| 98 |
|