Tom Aarsen
commited on
Commit
·
96fbb87
1
Parent(s):
ad207b7
Update README outputs + dim (768 -> 1024)
Browse files
README.md
CHANGED
|
@@ -2950,12 +2950,12 @@ doc_embeddings = model.encode([
|
|
| 2950 |
"search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten",
|
| 2951 |
])
|
| 2952 |
print(query_embeddings.shape, doc_embeddings.shape)
|
| 2953 |
-
# (2,
|
| 2954 |
|
| 2955 |
similarities = model.similarity(query_embeddings, doc_embeddings)
|
| 2956 |
print(similarities)
|
| 2957 |
-
# tensor([[0.
|
| 2958 |
-
# [0.
|
| 2959 |
```
|
| 2960 |
|
| 2961 |
<details><summary>Click to see Sentence Transformers usage with Matryoshka Truncation</summary>
|
|
@@ -2979,8 +2979,8 @@ print(query_embeddings.shape, doc_embeddings.shape)
|
|
| 2979 |
|
| 2980 |
similarities = model.similarity(query_embeddings, doc_embeddings)
|
| 2981 |
print(similarities)
|
| 2982 |
-
# tensor([[0.
|
| 2983 |
-
# [0.
|
| 2984 |
```
|
| 2985 |
|
| 2986 |
Note the small differences compared to the full 1024-dimensional similarities.
|
|
@@ -3023,12 +3023,12 @@ query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
|
|
| 3023 |
doc_embeddings = mean_pooling(documents_outputs, encoded_documents["attention_mask"])
|
| 3024 |
doc_embeddings = F.normalize(doc_embeddings, p=2, dim=1)
|
| 3025 |
print(query_embeddings.shape, doc_embeddings.shape)
|
| 3026 |
-
# torch.Size([2,
|
| 3027 |
|
| 3028 |
similarities = query_embeddings @ doc_embeddings.T
|
| 3029 |
print(similarities)
|
| 3030 |
-
# tensor([[0.
|
| 3031 |
-
# [0.
|
| 3032 |
```
|
| 3033 |
|
| 3034 |
<details><summary>Click to see Transformers usage with Matryoshka Truncation</summary>
|
|
@@ -3076,11 +3076,11 @@ print(query_embeddings.shape, doc_embeddings.shape)
|
|
| 3076 |
|
| 3077 |
similarities = query_embeddings @ doc_embeddings.T
|
| 3078 |
print(similarities)
|
| 3079 |
-
# tensor([[0.
|
| 3080 |
-
# [0.
|
| 3081 |
```
|
| 3082 |
|
| 3083 |
-
Note the small differences compared to the full
|
| 3084 |
|
| 3085 |
</details>
|
| 3086 |
|
|
@@ -3116,7 +3116,7 @@ const doc_embeddings = await extractor([
|
|
| 3116 |
|
| 3117 |
// Compute similarity scores
|
| 3118 |
const similarities = await matmul(query_embeddings, doc_embeddings.transpose(1, 0));
|
| 3119 |
-
console.log(similarities.tolist());
|
| 3120 |
```
|
| 3121 |
|
| 3122 |
|
|
|
|
| 2950 |
"search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten",
|
| 2951 |
])
|
| 2952 |
print(query_embeddings.shape, doc_embeddings.shape)
|
| 2953 |
+
# (2, 1024) (1, 1024)
|
| 2954 |
|
| 2955 |
similarities = model.similarity(query_embeddings, doc_embeddings)
|
| 2956 |
print(similarities)
|
| 2957 |
+
# tensor([[0.6518],
|
| 2958 |
+
# [0.4237]])
|
| 2959 |
```
|
| 2960 |
|
| 2961 |
<details><summary>Click to see Sentence Transformers usage with Matryoshka Truncation</summary>
|
|
|
|
| 2979 |
|
| 2980 |
similarities = model.similarity(query_embeddings, doc_embeddings)
|
| 2981 |
print(similarities)
|
| 2982 |
+
# tensor([[0.6835],
|
| 2983 |
+
# [0.3982]])
|
| 2984 |
```
|
| 2985 |
|
| 2986 |
Note the small differences compared to the full 1024-dimensional similarities.
|
|
|
|
| 3023 |
doc_embeddings = mean_pooling(documents_outputs, encoded_documents["attention_mask"])
|
| 3024 |
doc_embeddings = F.normalize(doc_embeddings, p=2, dim=1)
|
| 3025 |
print(query_embeddings.shape, doc_embeddings.shape)
|
| 3026 |
+
# torch.Size([2, 1024]) torch.Size([1, 1024])
|
| 3027 |
|
| 3028 |
similarities = query_embeddings @ doc_embeddings.T
|
| 3029 |
print(similarities)
|
| 3030 |
+
# tensor([[0.6518],
|
| 3031 |
+
# [0.4237]])
|
| 3032 |
```
|
| 3033 |
|
| 3034 |
<details><summary>Click to see Transformers usage with Matryoshka Truncation</summary>
|
|
|
|
| 3076 |
|
| 3077 |
similarities = query_embeddings @ doc_embeddings.T
|
| 3078 |
print(similarities)
|
| 3079 |
+
# tensor([[0.6835],
|
| 3080 |
+
# [0.3982]])
|
| 3081 |
```
|
| 3082 |
|
| 3083 |
+
Note the small differences compared to the full 1024-dimensional similarities.
|
| 3084 |
|
| 3085 |
</details>
|
| 3086 |
|
|
|
|
| 3116 |
|
| 3117 |
// Compute similarity scores
|
| 3118 |
const similarities = await matmul(query_embeddings, doc_embeddings.transpose(1, 0));
|
| 3119 |
+
console.log(similarities.tolist());
|
| 3120 |
```
|
| 3121 |
|
| 3122 |
|