Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +547 -0
- config.json +25 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +65 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 384,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,547 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- generated_from_trainer
|
| 7 |
+
- dataset_size:44286
|
| 8 |
+
- loss:MultipleNegativesRankingLoss
|
| 9 |
+
base_model: sentence-transformers/all-MiniLM-L6-v2
|
| 10 |
+
widget:
|
| 11 |
+
- source_sentence: Repetition pitch is caused by what phenomenon?
|
| 12 |
+
sentences:
|
| 13 |
+
- It is still possible for two sounds of indefinite pitch to clearly be higher or
|
| 14 |
+
lower than one another. For instance, a snare drum sounds higher pitched than
|
| 15 |
+
a bass drum though both have indefinite pitch, because its sound contains higher
|
| 16 |
+
frequencies. In other words, it is possible and often easy to roughly discern
|
| 17 |
+
the relative pitches of two sounds of indefinite pitch, but sounds of indefinite
|
| 18 |
+
pitch do not neatly correspond to any specific pitch. A special type of pitch
|
| 19 |
+
often occurs in free nature when sound reaches the ear of an observer directly
|
| 20 |
+
from the source, and also after reflecting off a sound-reflecting surface. This
|
| 21 |
+
phenomenon is called repetition pitch, because the addition of a true repetition
|
| 22 |
+
of the original sound to itself is the basic prerequisite.
|
| 23 |
+
- It is still possible for two sounds of indefinite pitch to clearly be higher or
|
| 24 |
+
lower than one another. For instance, a snare drum sounds higher pitched than
|
| 25 |
+
a bass drum though both have indefinite pitch, because its sound contains higher
|
| 26 |
+
frequencies. In other words, it is possible and often easy to roughly discern
|
| 27 |
+
the relative pitches of two sounds of indefinite pitch, but sounds of indefinite
|
| 28 |
+
pitch do not neatly correspond to any specific pitch. A special type of pitch
|
| 29 |
+
often occurs in free nature when sound reaches the ear of an observer directly
|
| 30 |
+
from the source, and also after reflecting off a sound-reflecting surface. This
|
| 31 |
+
phenomenon is called repetition pitch, because the addition of a true repetition
|
| 32 |
+
of the original sound to itself is the basic prerequisite.
|
| 33 |
+
- Several recent movies have been filmed in New Haven, including Mona Lisa Smile
|
| 34 |
+
(2003), with Julia Roberts, The Life Before Her Eyes (2007), with Uma Thurman,
|
| 35 |
+
and Indiana Jones and the Kingdom of the Crystal Skull (2008) directed by Steven
|
| 36 |
+
Spielberg and starring Harrison Ford, Cate Blanchett and Shia LaBeouf. The filming
|
| 37 |
+
of Crystal Skull involved an extensive chase sequence through the streets of New
|
| 38 |
+
Haven. Several downtown streets were closed to traffic and received a "makeover"
|
| 39 |
+
to look like streets of 1957, when the film is set. 500 locals were cast as extras
|
| 40 |
+
for the film. In Everybody's Fine (2009), Robert De Niro has a close encounter
|
| 41 |
+
in what is supposed to be the Denver train station; the scene was filmed in New
|
| 42 |
+
Haven's Union Station.
|
| 43 |
+
- source_sentence: Of what discipline is ethology a branch?
|
| 44 |
+
sentences:
|
| 45 |
+
- This time they succeeded, and on 31 December 1600, the Queen granted a Royal Charter
|
| 46 |
+
to "George, Earl of Cumberland, and 215 Knights, Aldermen, and Burgesses" under
|
| 47 |
+
the name, Governor and Company of Merchants of London trading with the East Indies.
|
| 48 |
+
For a period of fifteen years the charter awarded the newly formed company a monopoly
|
| 49 |
+
on trade with all countries east of the Cape of Good Hope and west of the Straits
|
| 50 |
+
of Magellan. Sir James Lancaster commanded the first East India Company voyage
|
| 51 |
+
in 1601 and returned in 1603. and in March 1604 Sir Henry Middleton commanded
|
| 52 |
+
the second voyage. General William Keeling, a captain during the second voyage,
|
| 53 |
+
led the third voyage from 1607 to 1610.
|
| 54 |
+
- In philosophy, emotions are studied in sub-fields such as ethics, the philosophy
|
| 55 |
+
of art (for example, sensory–emotional values, and matters of taste and sentimentality),
|
| 56 |
+
and the philosophy of music (see also Music and emotion). In history, scholars
|
| 57 |
+
examine documents and other sources to interpret and analyze past activities;
|
| 58 |
+
speculation on the emotional state of the authors of historical documents is one
|
| 59 |
+
of the tools of interpretation. In literature and film-making, the expression
|
| 60 |
+
of emotion is the cornerstone of genres such as drama, melodrama, and romance.
|
| 61 |
+
In communication studies, scholars study the role that emotion plays in the dissemination
|
| 62 |
+
of ideas and messages. Emotion is also studied in non-human animals in ethology,
|
| 63 |
+
a branch of zoology which focuses on the scientific study of animal behavior.
|
| 64 |
+
Ethology is a combination of laboratory and field science, with strong ties to
|
| 65 |
+
ecology and evolution. Ethologists often study one type of behavior (for example,
|
| 66 |
+
aggression) in a number of unrelated animals.
|
| 67 |
+
- In philosophy, emotions are studied in sub-fields such as ethics, the philosophy
|
| 68 |
+
of art (for example, sensory–emotional values, and matters of taste and sentimentality),
|
| 69 |
+
and the philosophy of music (see also Music and emotion). In history, scholars
|
| 70 |
+
examine documents and other sources to interpret and analyze past activities;
|
| 71 |
+
speculation on the emotional state of the authors of historical documents is one
|
| 72 |
+
of the tools of interpretation. In literature and film-making, the expression
|
| 73 |
+
of emotion is the cornerstone of genres such as drama, melodrama, and romance.
|
| 74 |
+
In communication studies, scholars study the role that emotion plays in the dissemination
|
| 75 |
+
of ideas and messages. Emotion is also studied in non-human animals in ethology,
|
| 76 |
+
a branch of zoology which focuses on the scientific study of animal behavior.
|
| 77 |
+
Ethology is a combination of laboratory and field science, with strong ties to
|
| 78 |
+
ecology and evolution. Ethologists often study one type of behavior (for example,
|
| 79 |
+
aggression) in a number of unrelated animals.
|
| 80 |
+
- source_sentence: Which humans does evolutionary anthropology concern itself with
|
| 81 |
+
the biological and cultural evolution of?
|
| 82 |
+
sentences:
|
| 83 |
+
- Bell began a series of public demonstrations and lectures to introduce the new
|
| 84 |
+
invention to the scientific community as well as the general public. A short time
|
| 85 |
+
later, his demonstration of an early telephone prototype at the 1876 Centennial
|
| 86 |
+
Exposition in Philadelphia brought the telephone to international attention. Influential
|
| 87 |
+
visitors to the exhibition included Emperor Pedro II of Brazil. Later Bell had
|
| 88 |
+
the opportunity to demonstrate the invention personally to Sir William Thomson
|
| 89 |
+
(later, Lord Kelvin), a renowned Scottish scientist, as well as to Queen Victoria,
|
| 90 |
+
who had requested a private audience at Osborne House, her Isle of Wight home.
|
| 91 |
+
She called the demonstration "most extraordinary". The enthusiasm surrounding
|
| 92 |
+
Bell's public displays laid the groundwork for universal acceptance of the revolutionary
|
| 93 |
+
device.
|
| 94 |
+
- Evolutionary anthropology is the interdisciplinary study of the evolution of human
|
| 95 |
+
physiology and human behaviour and the relation between hominins and non-hominin
|
| 96 |
+
primates. Evolutionary anthropology is based in natural science and social science,
|
| 97 |
+
combining the human development with socioeconomic factors. Evolutionary anthropology
|
| 98 |
+
is concerned with both biological and cultural evolution of humans, past and present.
|
| 99 |
+
It is based on a scientific approach, and brings together fields such as archaeology,
|
| 100 |
+
behavioral ecology, psychology, primatology, and genetics. It is a dynamic and
|
| 101 |
+
interdisciplinary field, drawing on many lines of evidence to understand the human
|
| 102 |
+
experience, past and present.
|
| 103 |
+
- Evolutionary anthropology is the interdisciplinary study of the evolution of human
|
| 104 |
+
physiology and human behaviour and the relation between hominins and non-hominin
|
| 105 |
+
primates. Evolutionary anthropology is based in natural science and social science,
|
| 106 |
+
combining the human development with socioeconomic factors. Evolutionary anthropology
|
| 107 |
+
is concerned with both biological and cultural evolution of humans, past and present.
|
| 108 |
+
It is based on a scientific approach, and brings together fields such as archaeology,
|
| 109 |
+
behavioral ecology, psychology, primatology, and genetics. It is a dynamic and
|
| 110 |
+
interdisciplinary field, drawing on many lines of evidence to understand the human
|
| 111 |
+
experience, past and present.
|
| 112 |
+
- source_sentence: 'Where did florida rank in population growth '
|
| 113 |
+
sentences:
|
| 114 |
+
- The United States Census Bureau estimates that the population of Florida was 20,271,272
|
| 115 |
+
on July 1, 2015, a 7.82% increase since the 2010 United States Census. The population
|
| 116 |
+
of Florida in the 2010 census was 18,801,310. Florida was the seventh fastest-growing
|
| 117 |
+
state in the U.S. in the 12-month period ending July 1, 2012. In 2010, the center
|
| 118 |
+
of population of Florida was located between Fort Meade and Frostproof. The center
|
| 119 |
+
of population has moved less than 5 miles (8 km) to the east and approximately
|
| 120 |
+
1 mile (1.6 km) to the north between 1980 and 2010 and has been located in Polk
|
| 121 |
+
County since the 1960 census. The population exceeded 19.7 million by December
|
| 122 |
+
2014, surpassing the population of the state of New York for the first time.
|
| 123 |
+
- 'Treaties can be loosely compared to contracts: both are means of willing parties
|
| 124 |
+
assuming obligations among themselves, and a party to either that fails to live
|
| 125 |
+
up to their obligations can be held liable under international law.'
|
| 126 |
+
- The United States Census Bureau estimates that the population of Florida was 20,271,272
|
| 127 |
+
on July 1, 2015, a 7.82% increase since the 2010 United States Census. The population
|
| 128 |
+
of Florida in the 2010 census was 18,801,310. Florida was the seventh fastest-growing
|
| 129 |
+
state in the U.S. in the 12-month period ending July 1, 2012. In 2010, the center
|
| 130 |
+
of population of Florida was located between Fort Meade and Frostproof. The center
|
| 131 |
+
of population has moved less than 5 miles (8 km) to the east and approximately
|
| 132 |
+
1 mile (1.6 km) to the north between 1980 and 2010 and has been located in Polk
|
| 133 |
+
County since the 1960 census. The population exceeded 19.7 million by December
|
| 134 |
+
2014, surpassing the population of the state of New York for the first time.
|
| 135 |
+
- source_sentence: What is the name of the cup that some Catholics think is the Grail?
|
| 136 |
+
sentences:
|
| 137 |
+
- On 20 September 2013, Queen + Adam Lambert performed at the iHeartRadio Music
|
| 138 |
+
Festival at the MGM Grand Hotel & Casino in Las Vegas. On 6 March 2014, the band
|
| 139 |
+
announced on Good Morning America that Queen + Adam Lambert will tour North America
|
| 140 |
+
in Summer 2014. The band will also tour Australia and New Zealand in August/September
|
| 141 |
+
2014. In an interview with Rolling Stone, May and Taylor said that although the
|
| 142 |
+
tour with Lambert is a limited thing, they are open to him becoming an official
|
| 143 |
+
member, and cutting new material with him.
|
| 144 |
+
- The quail is a small to medium-sized, cryptically coloured bird. In its natural
|
| 145 |
+
environment, it is found in bushy places, in rough grassland, among agricultural
|
| 146 |
+
crops, and in other places with dense cover. It feeds on seeds, insects, and other
|
| 147 |
+
small invertebrates. Being a largely ground-dwelling, gregarious bird, domestication
|
| 148 |
+
of the quail was not difficult, although many of its wild instincts are retained
|
| 149 |
+
in captivity. It was known to the Egyptians long before the arrival of chickens
|
| 150 |
+
and was depicted in hieroglyphs from 2575 BC. It migrated across Egypt in vast
|
| 151 |
+
flocks and the birds could sometimes be picked up off the ground by hand. These
|
| 152 |
+
were the common quail (Coturnix coturnix), but modern domesticated flocks are
|
| 153 |
+
mostly of Japanese quail (Coturnix japonica) which was probably domesticated as
|
| 154 |
+
early as the 11th century AD in Japan. They were originally kept as songbirds,
|
| 155 |
+
and they are thought to have been regularly used in song contests.
|
| 156 |
+
- On 9 July 2006, during Mass at Valencia's Cathedral, Our Lady of the Forsaken
|
| 157 |
+
Basilica, Pope Benedict XVI used, at the World Day of Families, the Santo Caliz,
|
| 158 |
+
a 1st-century Middle-Eastern artifact that some Catholics believe is the Holy
|
| 159 |
+
Grail. It was supposedly brought to that church by Emperor Valerian in the 3rd
|
| 160 |
+
century, after having been brought by St. Peter to Rome from Jerusalem. The Santo
|
| 161 |
+
Caliz (Holy Chalice) is a simple, small stone cup. Its base was added in Medieval
|
| 162 |
+
Times and consists of fine gold, alabaster and gem stones.
|
| 163 |
+
pipeline_tag: sentence-similarity
|
| 164 |
+
library_name: sentence-transformers
|
| 165 |
+
metrics:
|
| 166 |
+
- cosine_accuracy
|
| 167 |
+
model-index:
|
| 168 |
+
- name: SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
| 169 |
+
results:
|
| 170 |
+
- task:
|
| 171 |
+
type: triplet
|
| 172 |
+
name: Triplet
|
| 173 |
+
dataset:
|
| 174 |
+
name: gooqa dev
|
| 175 |
+
type: gooqa-dev
|
| 176 |
+
metrics:
|
| 177 |
+
- type: cosine_accuracy
|
| 178 |
+
value: 0.40779998898506165
|
| 179 |
+
name: Cosine Accuracy
|
| 180 |
+
---
|
| 181 |
+
|
| 182 |
+
# SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
|
| 183 |
+
|
| 184 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 185 |
+
|
| 186 |
+
## Model Details
|
| 187 |
+
|
| 188 |
+
### Model Description
|
| 189 |
+
- **Model Type:** Sentence Transformer
|
| 190 |
+
- **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision c9745ed1d9f207416be6d2e6f8de32d1f16199bf -->
|
| 191 |
+
- **Maximum Sequence Length:** 256 tokens
|
| 192 |
+
- **Output Dimensionality:** 384 dimensions
|
| 193 |
+
- **Similarity Function:** Cosine Similarity
|
| 194 |
+
<!-- - **Training Dataset:** Unknown -->
|
| 195 |
+
<!-- - **Language:** Unknown -->
|
| 196 |
+
<!-- - **License:** Unknown -->
|
| 197 |
+
|
| 198 |
+
### Model Sources
|
| 199 |
+
|
| 200 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 201 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 202 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 203 |
+
|
| 204 |
+
### Full Model Architecture
|
| 205 |
+
|
| 206 |
+
```
|
| 207 |
+
SentenceTransformer(
|
| 208 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
|
| 209 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 210 |
+
(2): Normalize()
|
| 211 |
+
)
|
| 212 |
+
```
|
| 213 |
+
|
| 214 |
+
## Usage
|
| 215 |
+
|
| 216 |
+
### Direct Usage (Sentence Transformers)
|
| 217 |
+
|
| 218 |
+
First install the Sentence Transformers library:
|
| 219 |
+
|
| 220 |
+
```bash
|
| 221 |
+
pip install -U sentence-transformers
|
| 222 |
+
```
|
| 223 |
+
|
| 224 |
+
Then you can load this model and run inference.
|
| 225 |
+
```python
|
| 226 |
+
from sentence_transformers import SentenceTransformer
|
| 227 |
+
|
| 228 |
+
# Download from the 🤗 Hub
|
| 229 |
+
model = SentenceTransformer("ayushexel/embed-all-MiniLM-L6-v2-squad-2-epochs")
|
| 230 |
+
# Run inference
|
| 231 |
+
sentences = [
|
| 232 |
+
'What is the name of the cup that some Catholics think is the Grail?',
|
| 233 |
+
"On 9 July 2006, during Mass at Valencia's Cathedral, Our Lady of the Forsaken Basilica, Pope Benedict XVI used, at the World Day of Families, the Santo Caliz, a 1st-century Middle-Eastern artifact that some Catholics believe is the Holy Grail. It was supposedly brought to that church by Emperor Valerian in the 3rd century, after having been brought by St. Peter to Rome from Jerusalem. The Santo Caliz (Holy Chalice) is a simple, small stone cup. Its base was added in Medieval Times and consists of fine gold, alabaster and gem stones.",
|
| 234 |
+
'The quail is a small to medium-sized, cryptically coloured bird. In its natural environment, it is found in bushy places, in rough grassland, among agricultural crops, and in other places with dense cover. It feeds on seeds, insects, and other small invertebrates. Being a largely ground-dwelling, gregarious bird, domestication of the quail was not difficult, although many of its wild instincts are retained in captivity. It was known to the Egyptians long before the arrival of chickens and was depicted in hieroglyphs from 2575 BC. It migrated across Egypt in vast flocks and the birds could sometimes be picked up off the ground by hand. These were the common quail (Coturnix coturnix), but modern domesticated flocks are mostly of Japanese quail (Coturnix japonica) which was probably domesticated as early as the 11th century AD in Japan. They were originally kept as songbirds, and they are thought to have been regularly used in song contests.',
|
| 235 |
+
]
|
| 236 |
+
embeddings = model.encode(sentences)
|
| 237 |
+
print(embeddings.shape)
|
| 238 |
+
# [3, 384]
|
| 239 |
+
|
| 240 |
+
# Get the similarity scores for the embeddings
|
| 241 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 242 |
+
print(similarities.shape)
|
| 243 |
+
# [3, 3]
|
| 244 |
+
```
|
| 245 |
+
|
| 246 |
+
<!--
|
| 247 |
+
### Direct Usage (Transformers)
|
| 248 |
+
|
| 249 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 250 |
+
|
| 251 |
+
</details>
|
| 252 |
+
-->
|
| 253 |
+
|
| 254 |
+
<!--
|
| 255 |
+
### Downstream Usage (Sentence Transformers)
|
| 256 |
+
|
| 257 |
+
You can finetune this model on your own dataset.
|
| 258 |
+
|
| 259 |
+
<details><summary>Click to expand</summary>
|
| 260 |
+
|
| 261 |
+
</details>
|
| 262 |
+
-->
|
| 263 |
+
|
| 264 |
+
<!--
|
| 265 |
+
### Out-of-Scope Use
|
| 266 |
+
|
| 267 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 268 |
+
-->
|
| 269 |
+
|
| 270 |
+
## Evaluation
|
| 271 |
+
|
| 272 |
+
### Metrics
|
| 273 |
+
|
| 274 |
+
#### Triplet
|
| 275 |
+
|
| 276 |
+
* Dataset: `gooqa-dev`
|
| 277 |
+
* Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
|
| 278 |
+
|
| 279 |
+
| Metric | Value |
|
| 280 |
+
|:--------------------|:-----------|
|
| 281 |
+
| **cosine_accuracy** | **0.4078** |
|
| 282 |
+
|
| 283 |
+
<!--
|
| 284 |
+
## Bias, Risks and Limitations
|
| 285 |
+
|
| 286 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 287 |
+
-->
|
| 288 |
+
|
| 289 |
+
<!--
|
| 290 |
+
### Recommendations
|
| 291 |
+
|
| 292 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 293 |
+
-->
|
| 294 |
+
|
| 295 |
+
## Training Details
|
| 296 |
+
|
| 297 |
+
### Training Dataset
|
| 298 |
+
|
| 299 |
+
#### Unnamed Dataset
|
| 300 |
+
|
| 301 |
+
* Size: 44,286 training samples
|
| 302 |
+
* Columns: <code>question</code>, <code>context</code>, and <code>negative</code>
|
| 303 |
+
* Approximate statistics based on the first 1000 samples:
|
| 304 |
+
| | question | context | negative |
|
| 305 |
+
|:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
| 306 |
+
| type | string | string | string |
|
| 307 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 14.48 tokens</li><li>max: 39 tokens</li></ul> | <ul><li>min: 28 tokens</li><li>mean: 147.46 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 29 tokens</li><li>mean: 147.85 tokens</li><li>max: 256 tokens</li></ul> |
|
| 308 |
+
* Samples:
|
| 309 |
+
| question | context | negative |
|
| 310 |
+
|:-------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 311 |
+
| <code>What are the two cycles of the Greek folk song?</code> | <code>Along with the Byzantine (Church) chant and music, the Greek people also cultivated the Greek folk song which is divided into two cycles, the akritic and klephtic. The akritic was created between the 9th and 10th centuries and expressed the life and struggles of the akrites (frontier guards) of the Byzantine empire, the most well known being the stories associated with Digenes Akritas. The klephtic cycle came into being between the late Byzantine period and the start of the Greek War of Independence. The klephtic cycle, together with historical songs, paraloghes (narrative song or ballad), love songs, mantinades, wedding songs, songs of exile and dirges express the life of the Greeks. There is a unity between the Greek people's struggles for freedom, their joys and sorrow and attitudes towards love and death.</code> | <code>The Hellenic languages or Greek language are widely spoken in Greece and in the Greek part of Cyprus. Additionally, other varieties of Greek are spoken in small communities in parts of other European counties.</code> |
|
| 312 |
+
| <code>What material is within a wrestling ring?</code> | <code>Matches are held within a wrestling ring, an elevated square canvas mat with posts on each corner. A cloth apron hangs over the edges of the ring. Three horizontal ropes or cables surround the ring, suspended with turnbuckles which are connected to the posts. For safety, the ropes are padded at the turnbuckles and cushioned mats surround the floor outside the ring. Guardrails or a similar barrier enclose this area from the audience. Wrestlers are generally expected to stay within the confines of the ring, though matches sometimes end up outside the ring, and even in the audience, to add excitement.</code> | <code>Many modern specialty matches have been devised, with unique winning conditions. The most common of these is the ladder match. In the basic ladder match, the wrestlers or teams of wrestlers must climb a ladder to obtain a prize that is hoisted above the ring. The key to winning this match is that the wrestler or team of wrestlers must try to incapacitate each other long enough for one wrestler to climb the ladder and secure that prize for their team. As a result, the ladder can be used as a weapon. The prizes include – but are not limited to any given championship belt (the traditional prize), a document granting the winner the right to a future title shot, or any document that matters to the wrestlers involved in the match (such as one granting the winner a cash prize). Another common specialty match is known as the battle royal. In a battle royal, all the wrestlers enter the ring to the point that there are 20-30 wrestlers in the ring at one time. When the match begins, the simple ob...</code> |
|
| 313 |
+
| <code>What is the Hebrew Bible?</code> | <code>The Hebrew Bible, a religious interpretation of the traditions and early national history of the Jews, established the first of the Abrahamic religions, which are now practiced by 54% of the world. Judaism guides its adherents in both practice and belief, and has been called not only a religion, but also a "way of life," which has made drawing a clear distinction between Judaism, Jewish culture, and Jewish identity rather difficult. Throughout history, in eras and places as diverse as the ancient Hellenic world, in Europe before and after The Age of Enlightenment (see Haskalah), in Islamic Spain and Portugal, in North Africa and the Middle East, India, China, or the contemporary United States and Israel, cultural phenomena have developed that are in some sense characteristically Jewish without being at all specifically religious. Some factors in this come from within Judaism, others from the interaction of Jews or specific communities of Jews with their surroundings, others from the in...</code> | <code>Israel's diverse culture stems from the diversity of its population: Jews from diaspora communities around the world have brought their cultural and religious traditions back with them, creating a melting pot of Jewish customs and beliefs. Israel is the only country in the world where life revolves around the Hebrew calendar. Work and school holidays are determined by the Jewish holidays, and the official day of rest is Saturday, the Jewish Sabbath. Israel's substantial Arab minority has also left its imprint on Israeli culture in such spheres as architecture, music, and cuisine.</code> |
|
| 314 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 315 |
+
```json
|
| 316 |
+
{
|
| 317 |
+
"scale": 20.0,
|
| 318 |
+
"similarity_fct": "cos_sim"
|
| 319 |
+
}
|
| 320 |
+
```
|
| 321 |
+
|
| 322 |
+
### Evaluation Dataset
|
| 323 |
+
|
| 324 |
+
#### Unnamed Dataset
|
| 325 |
+
|
| 326 |
+
* Size: 5,000 evaluation samples
|
| 327 |
+
* Columns: <code>question</code>, <code>context</code>, and <code>negative_1</code>
|
| 328 |
+
* Approximate statistics based on the first 1000 samples:
|
| 329 |
+
| | question | context | negative_1 |
|
| 330 |
+
|:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
| 331 |
+
| type | string | string | string |
|
| 332 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 14.47 tokens</li><li>max: 36 tokens</li></ul> | <ul><li>min: 28 tokens</li><li>mean: 147.36 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 28 tokens</li><li>mean: 146.82 tokens</li><li>max: 256 tokens</li></ul> |
|
| 333 |
+
* Samples:
|
| 334 |
+
| question | context | negative_1 |
|
| 335 |
+
|:-------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 336 |
+
| <code>What state supported DST because it wanted to sell more potatoes?</code> | <code>The history of time in the United States includes DST during both world wars, but no standardization of peacetime DST until 1966. In May 1965, for two weeks, St. Paul, Minnesota and Minneapolis, Minnesota were on different times, when the capital city decided to join most of the nation by starting Daylight Saving Time while Minneapolis opted to follow the later date set by state law. In the mid-1980s, Clorox (parent of Kingsford Charcoal) and 7-Eleven provided the primary funding for the Daylight Saving Time Coalition behind the 1987 extension to US DST, and both Idaho senators voted for it based on the premise that during DST fast-food restaurants sell more French fries, which are made from Idaho potatoes.</code> | <code>The history of time in the United States includes DST during both world wars, but no standardization of peacetime DST until 1966. In May 1965, for two weeks, St. Paul, Minnesota and Minneapolis, Minnesota were on different times, when the capital city decided to join most of the nation by starting Daylight Saving Time while Minneapolis opted to follow the later date set by state law. In the mid-1980s, Clorox (parent of Kingsford Charcoal) and 7-Eleven provided the primary funding for the Daylight Saving Time Coalition behind the 1987 extension to US DST, and both Idaho senators voted for it based on the premise that during DST fast-food restaurants sell more French fries, which are made from Idaho potatoes.</code> |
|
| 337 |
+
| <code>Who dealt with the design faults of the palace?</code> | <code>Buckingham Palace finally became the principal royal residence in 1837, on the accession of Queen Victoria, who was the first monarch to reside there; her predecessor William IV had died before its completion. While the state rooms were a riot of gilt and colour, the necessities of the new palace were somewhat less luxurious. For one thing, it was reported the chimneys smoked so much that the fires had to be allowed to die down, and consequently the court shivered in icy magnificence. Ventilation was so bad that the interior smelled, and when a decision was taken to install gas lamps, there was a serious worry about the build-up of gas on the lower floors. It was also said that staff were lax and lazy and the palace was dirty. Following the queen's marriage in 1840, her husband, Prince Albert, concerned himself with a reorganisation of the household offices and staff, and with the design faults of the palace. The problems were all rectified by the close of 1840. However, the builders w...</code> | <code>Buckingham Palace finally became the principal royal residence in 1837, on the accession of Queen Victoria, who was the first monarch to reside there; her predecessor William IV had died before its completion. While the state rooms were a riot of gilt and colour, the necessities of the new palace were somewhat less luxurious. For one thing, it was reported the chimneys smoked so much that the fires had to be allowed to die down, and consequently the court shivered in icy magnificence. Ventilation was so bad that the interior smelled, and when a decision was taken to install gas lamps, there was a serious worry about the build-up of gas on the lower floors. It was also said that staff were lax and lazy and the palace was dirty. Following the queen's marriage in 1840, her husband, Prince Albert, concerned himself with a reorganisation of the household offices and staff, and with the design faults of the palace. The problems were all rectified by the close of 1840. However, the builders w...</code> |
|
| 338 |
+
| <code>On what date did IndyMac fail?</code> | <code>The first visible institution to run into trouble in the United States was the Southern California–based IndyMac, a spin-off of Countrywide Financial. Before its failure, IndyMac Bank was the largest savings and loan association in the Los Angeles market and the seventh largest mortgage originator in the United States. The failure of IndyMac Bank on July 11, 2008, was the fourth largest bank failure in United States history up until the crisis precipitated even larger failures, and the second largest failure of a regulated thrift. IndyMac Bank's parent corporation was IndyMac Bancorp until the FDIC seized IndyMac Bank. IndyMac Bancorp filed for Chapter 7 bankruptcy in July 2008.</code> | <code>The first visible institution to run into trouble in the United States was the Southern California–based IndyMac, a spin-off of Countrywide Financial. Before its failure, IndyMac Bank was the largest savings and loan association in the Los Angeles market and the seventh largest mortgage originator in the United States. The failure of IndyMac Bank on July 11, 2008, was the fourth largest bank failure in United States history up until the crisis precipitated even larger failures, and the second largest failure of a regulated thrift. IndyMac Bank's parent corporation was IndyMac Bancorp until the FDIC seized IndyMac Bank. IndyMac Bancorp filed for Chapter 7 bankruptcy in July 2008.</code> |
|
| 339 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 340 |
+
```json
|
| 341 |
+
{
|
| 342 |
+
"scale": 20.0,
|
| 343 |
+
"similarity_fct": "cos_sim"
|
| 344 |
+
}
|
| 345 |
+
```
|
| 346 |
+
|
| 347 |
+
### Training Hyperparameters
|
| 348 |
+
#### Non-Default Hyperparameters
|
| 349 |
+
|
| 350 |
+
- `eval_strategy`: steps
|
| 351 |
+
- `per_device_train_batch_size`: 128
|
| 352 |
+
- `per_device_eval_batch_size`: 128
|
| 353 |
+
- `num_train_epochs`: 2
|
| 354 |
+
- `warmup_ratio`: 0.1
|
| 355 |
+
- `fp16`: True
|
| 356 |
+
- `batch_sampler`: no_duplicates
|
| 357 |
+
|
| 358 |
+
#### All Hyperparameters
|
| 359 |
+
<details><summary>Click to expand</summary>
|
| 360 |
+
|
| 361 |
+
- `overwrite_output_dir`: False
|
| 362 |
+
- `do_predict`: False
|
| 363 |
+
- `eval_strategy`: steps
|
| 364 |
+
- `prediction_loss_only`: True
|
| 365 |
+
- `per_device_train_batch_size`: 128
|
| 366 |
+
- `per_device_eval_batch_size`: 128
|
| 367 |
+
- `per_gpu_train_batch_size`: None
|
| 368 |
+
- `per_gpu_eval_batch_size`: None
|
| 369 |
+
- `gradient_accumulation_steps`: 1
|
| 370 |
+
- `eval_accumulation_steps`: None
|
| 371 |
+
- `torch_empty_cache_steps`: None
|
| 372 |
+
- `learning_rate`: 5e-05
|
| 373 |
+
- `weight_decay`: 0.0
|
| 374 |
+
- `adam_beta1`: 0.9
|
| 375 |
+
- `adam_beta2`: 0.999
|
| 376 |
+
- `adam_epsilon`: 1e-08
|
| 377 |
+
- `max_grad_norm`: 1.0
|
| 378 |
+
- `num_train_epochs`: 2
|
| 379 |
+
- `max_steps`: -1
|
| 380 |
+
- `lr_scheduler_type`: linear
|
| 381 |
+
- `lr_scheduler_kwargs`: {}
|
| 382 |
+
- `warmup_ratio`: 0.1
|
| 383 |
+
- `warmup_steps`: 0
|
| 384 |
+
- `log_level`: passive
|
| 385 |
+
- `log_level_replica`: warning
|
| 386 |
+
- `log_on_each_node`: True
|
| 387 |
+
- `logging_nan_inf_filter`: True
|
| 388 |
+
- `save_safetensors`: True
|
| 389 |
+
- `save_on_each_node`: False
|
| 390 |
+
- `save_only_model`: False
|
| 391 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 392 |
+
- `no_cuda`: False
|
| 393 |
+
- `use_cpu`: False
|
| 394 |
+
- `use_mps_device`: False
|
| 395 |
+
- `seed`: 42
|
| 396 |
+
- `data_seed`: None
|
| 397 |
+
- `jit_mode_eval`: False
|
| 398 |
+
- `use_ipex`: False
|
| 399 |
+
- `bf16`: False
|
| 400 |
+
- `fp16`: True
|
| 401 |
+
- `fp16_opt_level`: O1
|
| 402 |
+
- `half_precision_backend`: auto
|
| 403 |
+
- `bf16_full_eval`: False
|
| 404 |
+
- `fp16_full_eval`: False
|
| 405 |
+
- `tf32`: None
|
| 406 |
+
- `local_rank`: 0
|
| 407 |
+
- `ddp_backend`: None
|
| 408 |
+
- `tpu_num_cores`: None
|
| 409 |
+
- `tpu_metrics_debug`: False
|
| 410 |
+
- `debug`: []
|
| 411 |
+
- `dataloader_drop_last`: False
|
| 412 |
+
- `dataloader_num_workers`: 0
|
| 413 |
+
- `dataloader_prefetch_factor`: None
|
| 414 |
+
- `past_index`: -1
|
| 415 |
+
- `disable_tqdm`: False
|
| 416 |
+
- `remove_unused_columns`: True
|
| 417 |
+
- `label_names`: None
|
| 418 |
+
- `load_best_model_at_end`: False
|
| 419 |
+
- `ignore_data_skip`: False
|
| 420 |
+
- `fsdp`: []
|
| 421 |
+
- `fsdp_min_num_params`: 0
|
| 422 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 423 |
+
- `tp_size`: 0
|
| 424 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 425 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 426 |
+
- `deepspeed`: None
|
| 427 |
+
- `label_smoothing_factor`: 0.0
|
| 428 |
+
- `optim`: adamw_torch
|
| 429 |
+
- `optim_args`: None
|
| 430 |
+
- `adafactor`: False
|
| 431 |
+
- `group_by_length`: False
|
| 432 |
+
- `length_column_name`: length
|
| 433 |
+
- `ddp_find_unused_parameters`: None
|
| 434 |
+
- `ddp_bucket_cap_mb`: None
|
| 435 |
+
- `ddp_broadcast_buffers`: False
|
| 436 |
+
- `dataloader_pin_memory`: True
|
| 437 |
+
- `dataloader_persistent_workers`: False
|
| 438 |
+
- `skip_memory_metrics`: True
|
| 439 |
+
- `use_legacy_prediction_loop`: False
|
| 440 |
+
- `push_to_hub`: False
|
| 441 |
+
- `resume_from_checkpoint`: None
|
| 442 |
+
- `hub_model_id`: None
|
| 443 |
+
- `hub_strategy`: every_save
|
| 444 |
+
- `hub_private_repo`: None
|
| 445 |
+
- `hub_always_push`: False
|
| 446 |
+
- `gradient_checkpointing`: False
|
| 447 |
+
- `gradient_checkpointing_kwargs`: None
|
| 448 |
+
- `include_inputs_for_metrics`: False
|
| 449 |
+
- `include_for_metrics`: []
|
| 450 |
+
- `eval_do_concat_batches`: True
|
| 451 |
+
- `fp16_backend`: auto
|
| 452 |
+
- `push_to_hub_model_id`: None
|
| 453 |
+
- `push_to_hub_organization`: None
|
| 454 |
+
- `mp_parameters`:
|
| 455 |
+
- `auto_find_batch_size`: False
|
| 456 |
+
- `full_determinism`: False
|
| 457 |
+
- `torchdynamo`: None
|
| 458 |
+
- `ray_scope`: last
|
| 459 |
+
- `ddp_timeout`: 1800
|
| 460 |
+
- `torch_compile`: False
|
| 461 |
+
- `torch_compile_backend`: None
|
| 462 |
+
- `torch_compile_mode`: None
|
| 463 |
+
- `dispatch_batches`: None
|
| 464 |
+
- `split_batches`: None
|
| 465 |
+
- `include_tokens_per_second`: False
|
| 466 |
+
- `include_num_input_tokens_seen`: False
|
| 467 |
+
- `neftune_noise_alpha`: None
|
| 468 |
+
- `optim_target_modules`: None
|
| 469 |
+
- `batch_eval_metrics`: False
|
| 470 |
+
- `eval_on_start`: False
|
| 471 |
+
- `use_liger_kernel`: False
|
| 472 |
+
- `eval_use_gather_object`: False
|
| 473 |
+
- `average_tokens_across_devices`: False
|
| 474 |
+
- `prompts`: None
|
| 475 |
+
- `batch_sampler`: no_duplicates
|
| 476 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 477 |
+
|
| 478 |
+
</details>
|
| 479 |
+
|
| 480 |
+
### Training Logs
|
| 481 |
+
| Epoch | Step | Training Loss | Validation Loss | gooqa-dev_cosine_accuracy |
|
| 482 |
+
|:------:|:----:|:-------------:|:---------------:|:-------------------------:|
|
| 483 |
+
| -1 | -1 | - | - | 0.3266 |
|
| 484 |
+
| 0.2890 | 100 | 0.4285 | 0.7828 | 0.3894 |
|
| 485 |
+
| 0.5780 | 200 | 0.3895 | 0.7691 | 0.4006 |
|
| 486 |
+
| 0.8671 | 300 | 0.3744 | 0.7545 | 0.3992 |
|
| 487 |
+
| 1.1561 | 400 | 0.3157 | 0.7396 | 0.4070 |
|
| 488 |
+
| 1.4451 | 500 | 0.2715 | 0.7422 | 0.4074 |
|
| 489 |
+
| 1.7341 | 600 | 0.2672 | 0.7405 | 0.4080 |
|
| 490 |
+
| -1 | -1 | - | - | 0.4078 |
|
| 491 |
+
|
| 492 |
+
|
| 493 |
+
### Framework Versions
|
| 494 |
+
- Python: 3.11.0
|
| 495 |
+
- Sentence Transformers: 4.0.1
|
| 496 |
+
- Transformers: 4.50.3
|
| 497 |
+
- PyTorch: 2.6.0+cu124
|
| 498 |
+
- Accelerate: 1.5.2
|
| 499 |
+
- Datasets: 3.5.0
|
| 500 |
+
- Tokenizers: 0.21.1
|
| 501 |
+
|
| 502 |
+
## Citation
|
| 503 |
+
|
| 504 |
+
### BibTeX
|
| 505 |
+
|
| 506 |
+
#### Sentence Transformers
|
| 507 |
+
```bibtex
|
| 508 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 509 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 510 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 511 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 512 |
+
month = "11",
|
| 513 |
+
year = "2019",
|
| 514 |
+
publisher = "Association for Computational Linguistics",
|
| 515 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 516 |
+
}
|
| 517 |
+
```
|
| 518 |
+
|
| 519 |
+
#### MultipleNegativesRankingLoss
|
| 520 |
+
```bibtex
|
| 521 |
+
@misc{henderson2017efficient,
|
| 522 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 523 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 524 |
+
year={2017},
|
| 525 |
+
eprint={1705.00652},
|
| 526 |
+
archivePrefix={arXiv},
|
| 527 |
+
primaryClass={cs.CL}
|
| 528 |
+
}
|
| 529 |
+
```
|
| 530 |
+
|
| 531 |
+
<!--
|
| 532 |
+
## Glossary
|
| 533 |
+
|
| 534 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 535 |
+
-->
|
| 536 |
+
|
| 537 |
+
<!--
|
| 538 |
+
## Model Card Authors
|
| 539 |
+
|
| 540 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 541 |
+
-->
|
| 542 |
+
|
| 543 |
+
<!--
|
| 544 |
+
## Model Card Contact
|
| 545 |
+
|
| 546 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 547 |
+
-->
|
config.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"gradient_checkpointing": false,
|
| 8 |
+
"hidden_act": "gelu",
|
| 9 |
+
"hidden_dropout_prob": 0.1,
|
| 10 |
+
"hidden_size": 384,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 1536,
|
| 13 |
+
"layer_norm_eps": 1e-12,
|
| 14 |
+
"max_position_embeddings": 512,
|
| 15 |
+
"model_type": "bert",
|
| 16 |
+
"num_attention_heads": 12,
|
| 17 |
+
"num_hidden_layers": 6,
|
| 18 |
+
"pad_token_id": 0,
|
| 19 |
+
"position_embedding_type": "absolute",
|
| 20 |
+
"torch_dtype": "float32",
|
| 21 |
+
"transformers_version": "4.50.3",
|
| 22 |
+
"type_vocab_size": 2,
|
| 23 |
+
"use_cache": true,
|
| 24 |
+
"vocab_size": 30522
|
| 25 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "4.0.1",
|
| 4 |
+
"transformers": "4.50.3",
|
| 5 |
+
"pytorch": "2.6.0+cu124"
|
| 6 |
+
},
|
| 7 |
+
"prompts": {},
|
| 8 |
+
"default_prompt_name": null,
|
| 9 |
+
"similarity_fn_name": "cosine"
|
| 10 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18d7314cf336de88475ee3cc21e5a9bcda25098fcfb4def584d504ea8ddd2dd1
|
| 3 |
+
size 90864192
|
modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 256,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": {
|
| 3 |
+
"content": "[CLS]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"mask_token": {
|
| 10 |
+
"content": "[MASK]",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "[PAD]",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"sep_token": {
|
| 24 |
+
"content": "[SEP]",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"unk_token": {
|
| 31 |
+
"content": "[UNK]",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
}
|
| 37 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_basic_tokenize": true,
|
| 47 |
+
"do_lower_case": true,
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "[MASK]",
|
| 50 |
+
"max_length": 128,
|
| 51 |
+
"model_max_length": 256,
|
| 52 |
+
"never_split": null,
|
| 53 |
+
"pad_to_multiple_of": null,
|
| 54 |
+
"pad_token": "[PAD]",
|
| 55 |
+
"pad_token_type_id": 0,
|
| 56 |
+
"padding_side": "right",
|
| 57 |
+
"sep_token": "[SEP]",
|
| 58 |
+
"stride": 0,
|
| 59 |
+
"strip_accents": null,
|
| 60 |
+
"tokenize_chinese_chars": true,
|
| 61 |
+
"tokenizer_class": "BertTokenizer",
|
| 62 |
+
"truncation_side": "right",
|
| 63 |
+
"truncation_strategy": "longest_first",
|
| 64 |
+
"unk_token": "[UNK]"
|
| 65 |
+
}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|