Upload ModernBERT model

e35b241 verified 6 months ago

27.8 kB

	---
	tags:
	- sentence-transformers
	- sentence-similarity
	- feature-extraction
	- generated_from_trainer
	- dataset_size:7059200
	- loss:MultipleNegativesRankingLoss
	base_model: Shuu12121/CodeModernBERT-Owl-3.0
	widget:
	- source_sentence: 'The maximum value of the slider. (default 0) <P>

	@return Returns the value of the attribute, or 0, if it hasn''t been set by the
	JSF file.'
	sentences:
	- "@Override\n public UpdateSmsChannelResult updateSmsChannel(UpdateSmsChannelRequest\
	\ request) {\n request = beforeClientExecution(request);\n return\
	\ executeUpdateSmsChannel(request);\n }"
	- "async function isValidOrigin(origin, sourceOrigin) {\n // This will fetch\
	\ the caches from https://cdn.ampproject.org/caches.json the first time it's\n\
	\ // called. Subsequent calls will receive a cached version.\n const officialCacheList\
	\ = await caches.list();\n // Calculate the cache specific origin\n const\
	\ cacheSubdomain = `https://${await createCacheSubdomain(sourceOrigin)}.`;\n \
	\ // Check all caches listed on ampproject.org\n for (const cache of officialCacheList)\
	\ {\n const cachedOrigin = cacheSubdomain + cache.cacheDomain;\n if\
	\ (origin === cachedOrigin) {\n return true;\n }\n }\n return\
	\ false;\n }"
	- "public java.lang.Object getMin() {\n\t\treturn (java.lang.Object) getStateHelper().eval(PropertyKeys.min,\
	\ 0);\n\t}"
	- source_sentence: 'The Method from the Date.getMinutes is deprecated. This is a helper-Method.


	@param date

	The Date-object to get the minutes.

	@return The minutes from the Date-object.'
	sentences:
	- "public static int getMinutes(final Date date)\n\t{\n\t\tfinal Calendar calendar\
	\ = Calendar.getInstance();\n\t\tcalendar.setTime(date);\n\t\treturn calendar.get(Calendar.MINUTE);\n\
	\t}"
	- "func (opts BeeOptions) Bind(name string, dst interface{}) error {\n\tv := opts.Value(name)\n\
	\tif v == nil {\n\t\treturn errors.New(\"Option with name \" + name + \" not found\"\
	)\n\t}\n\n\treturn ConvertValue(v, dst)\n}"
	- "public function createFor(Customer $customer, array $options = [], array $filters\
	\ = [])\n {\n $this->parentId = $customer->id;\n\n return parent::rest_create($options,\
	\ $filters);\n }"
	- source_sentence: "Return a list of all dates from 11/12/2015 to the present.\n\n\
	\ Args:\n boo: if true, list contains Numbers (20151230); if false, list contains\
	\ Strings (\"2015-12-30\")\n Returns:\n list of either Numbers or Strings"
	sentences:
	- "def all_days(boo):\n \n earliest = datetime.strptime(('2015-11-12').replace('-',\
	\ ' '), '%Y %m %d')\n latest = datetime.strptime(datetime.today().date().isoformat().replace('-',\
	\ ' '), '%Y %m %d')\n num_days = (latest - earliest).days + 1\n all_days = [latest\
	\ - timedelta(days=x) for x in range(num_days)]\n all_days.reverse()\n\n output\
	\ = []\n\n if boo:\n # Return as Integer, yyyymmdd\n for d in all_days:\n\
	\ output.append(int(str(d).replace('-', '')[:8]))\n else:\n # Return\
	\ as String, yyyy-mm-dd\n for d in all_days:\n output.append(str(d)[:10])\n\
	\ return output"
	- "public void setColSize3(Integer newColSize3) {\n\t\tInteger oldColSize3 = colSize3;\n\
	\t\tcolSize3 = newColSize3;\n\t\tif (eNotificationRequired())\n\t\t\teNotify(new\
	\ ENotificationImpl(this, Notification.SET, AfplibPackage.COLOR_SPECIFICATION__COL_SIZE3,\
	\ oldColSize3, colSize3));\n\t}"
	- "public function deleteCompanyBusinessUnitStoreAddress(CompanyBusinessUnitStoreAddressTransfer\
	\ $companyBusinessUnitStoreAddressTransfer): void\n {\n $this->getFactory()\n\
	\ ->createFosCompanyBusinessUnitStoreAddressQuery()\n ->findOneByIdCompanyBusinessUnitStoreAddress($companyBusinessUnitStoreAddressTransfer->getIdCompanyBusinessUnitStoreAddress())\n\
	\ ->delete();\n }"
	- source_sentence: 'Returns array of basket oxarticle objects


	@return array'
	sentences:
	- "public function visit(NodeVisitorInterface $visitor)\n {\n foreach\
	\ ($this->children as $child)\n {\n $child->visit($visitor);\n\
	\ }\n }"
	- "func GetColDefaultValue(ctx sessionctx.Context, col *model.ColumnInfo) (types.Datum,\
	\ error) {\n\treturn getColDefaultValue(ctx, col, col.GetDefaultValue())\n}"
	- "public function getBasketArticles()\n {\n $aBasketArticles = [];\n\
	\ /** @var \\oxBasketItem $oBasketItem */\n foreach ($this->_aBasketContents\
	\ as $sItemKey => $oBasketItem) {\n try {\n $oProduct\
	\ = $oBasketItem->getArticle(true);\n\n if (\\OxidEsales\\Eshop\\\
	Core\\Registry::getConfig()->getConfigParam('bl_perfLoadSelectLists')) {\n \
	\ // marking chosen select list\n $aSelList\
	\ = $oBasketItem->getSelList();\n if (is_array($aSelList) &&\
	\ ($aSelectlist = $oProduct->getSelectLists($sItemKey))) {\n \
	\ reset($aSelList);\n foreach ($aSelList as $conkey\
	\ => $iSel) {\n $aSelectlist[$conkey][$iSel]->selected\
	\ = 1;\n }\n $oProduct->setSelectlist($aSelectlist);\n\
	\ }\n }\n } catch (\\OxidEsales\\\
	Eshop\\Core\\Exception\\NoArticleException $oEx) {\n \\OxidEsales\\\
	Eshop\\Core\\Registry::getUtilsView()->addErrorToDisplay($oEx);\n \
	\ $this->removeItem($sItemKey);\n $this->calculateBasket(true);\n\
	\ continue;\n } catch (\\OxidEsales\\Eshop\\Core\\Exception\\\
	ArticleInputException $oEx) {\n \\OxidEsales\\Eshop\\Core\\Registry::getUtilsView()->addErrorToDisplay($oEx);\n\
	\ $this->removeItem($sItemKey);\n $this->calculateBasket(true);\n\
	\ continue;\n }\n\n $aBasketArticles[$sItemKey]\
	\ = $oProduct;\n }\n\n return $aBasketArticles;\n }"
	- source_sentence: get test root
	sentences:
	- "@CheckReturnValue\n @SchedulerSupport(SchedulerSupport.NONE)\n public final\
	\ Maybe<T> doOnDispose(Action onDispose) {\n return RxJavaPlugins.onAssembly(new\
	\ MaybePeek<T>(this,\n Functions.emptyConsumer(), // onSubscribe\n\
	\ Functions.emptyConsumer(), // onSuccess\n Functions.emptyConsumer(),\
	\ // onError\n Functions.EMPTY_ACTION, // onComplete\n \
	\ Functions.EMPTY_ACTION, // (onSuccess \| onError \| onComplete) after\n\
	\ ObjectHelper.requireNonNull(onDispose, \"onDispose is null\"\
	)\n ));\n }"
	- "protected Object parseKeyElement(Element keyEle, BeanDefinition bd, String defaultKeyTypeName)\
	\ {\n NodeList nl = keyEle.getChildNodes();\n Element subElement = null;\n\
	\ for (int i = 0; i < nl.getLength(); i++) {\n Node node = nl.item(i);\n\
	\ if (node instanceof Element) {\n // Child element is what we're\
	\ looking for.\n if (subElement != null)\n error(\"<key> element\
	\ must not contain more than one value sub-element\", keyEle);\n else subElement\
	\ = (Element) node;\n }\n }\n return parsePropertySubElement(subElement,\
	\ bd, defaultKeyTypeName);\n }"
	- "function getRootPath(){\n var rootPath = path.resolve('.');\n while(rootPath){\n\
	\ if(fs.existsSync(rootPath + '/config.json')){\n break;\n \
	\ }\n rootPath = rootPath.substring(0, rootPath.lastIndexOf(path.sep));\n\
	\ }\n return rootPath;\n}"
	pipeline_tag: sentence-similarity
	library_name: sentence-transformers
	---

	# SentenceTransformer based on Shuu12121/CodeModernBERT-Owl-3.0

	This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Shuu12121/CodeModernBERT-Owl-3.0](https://huggingface.co/Shuu12121/CodeModernBERT-Owl-3.0). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

	## Model Details

	### Model Description
	- Model Type: Sentence Transformer
	- Base model: [Shuu12121/CodeModernBERT-Owl-3.0](https://huggingface.co/Shuu12121/CodeModernBERT-Owl-3.0) <!-- at revision a6beebbd776ae122f34f875dfa731557a1f70d8f -->
	- Maximum Sequence Length: 1024 tokens
	- Output Dimensionality: 768 dimensions
	- Similarity Function: Cosine Similarity
	<!-- - Training Dataset: Unknown -->
	<!-- - Language: Unknown -->
	<!-- - License: Unknown -->

	### Model Sources

	- Documentation: [Sentence Transformers Documentation](https://sbert.net)
	- Repository: [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
	- Hugging Face: [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)

	### Full Model Architecture

	```
	SentenceTransformer(
	(0): Transformer({'max_seq_length': 1024, 'do_lower_case': False}) with Transformer model: ModernBertModel
	(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
	)
	```

	## Usage

	### Direct Usage (Sentence Transformers)

	First install the Sentence Transformers library:

	```bash
	pip install -U sentence-transformers
	```

	Then you can load this model and run inference.
	```python
	from sentence_transformers import SentenceTransformer

	# Download from the 🤗 Hub
	model = SentenceTransformer("sentence_transformers_model_id")
	# Run inference
	sentences = [
	'get test root',
	"function getRootPath(){\n var rootPath = path.resolve('.');\n while(rootPath){\n if(fs.existsSync(rootPath + '/config.json')){\n break;\n }\n rootPath = rootPath.substring(0, rootPath.lastIndexOf(path.sep));\n }\n return rootPath;\n}",
	'protected Object parseKeyElement(Element keyEle, BeanDefinition bd, String defaultKeyTypeName) {\n NodeList nl = keyEle.getChildNodes();\n Element subElement = null;\n for (int i = 0; i < nl.getLength(); i++) {\n Node node = nl.item(i);\n if (node instanceof Element) {\n // Child element is what we\'re looking for.\n if (subElement != null)\n error("<key> element must not contain more than one value sub-element", keyEle);\n else subElement = (Element) node;\n }\n }\n return parsePropertySubElement(subElement, bd, defaultKeyTypeName);\n }',
	]
	embeddings = model.encode(sentences)
	print(embeddings.shape)
	# [3, 768]

	# Get the similarity scores for the embeddings
	similarities = model.similarity(embeddings, embeddings)
	print(similarities.shape)
	# [3, 3]
	```

	<!--
	### Direct Usage (Transformers)

	<details><summary>Click to see the direct usage in Transformers</summary>

	</details>
	-->

	<!--
	### Downstream Usage (Sentence Transformers)

	You can finetune this model on your own dataset.

	<details><summary>Click to expand</summary>

	</details>
	-->

	<!--
	### Out-of-Scope Use

	List how the model may foreseeably be misused and address what users ought not to do with the model.
	-->

	<!--
	## Bias, Risks and Limitations

	What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.
	-->

	<!--
	### Recommendations

	What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.
	-->

	## Training Details

	### Training Dataset

	#### Unnamed Dataset

	* Size: 7,059,200 training samples
	* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
	* Approximate statistics based on the first 1000 samples:
	\| \| sentence_0 \| sentence_1 \| label \|
	\|:--------\|:-----------------------------------------------------------------------------------\|:--------------------------------------------------------------------------------------\|:--------------------------------------------------------------\|
	\| type \| string \| string \| float \|
	\| details \| <ul><li>min: 3 tokens</li><li>mean: 51.42 tokens</li><li>max: 974 tokens</li></ul> \| <ul><li>min: 29 tokens</li><li>mean: 162.71 tokens</li><li>max: 1024 tokens</li></ul> \| <ul><li>min: 1.0</li><li>mean: 1.0</li><li>max: 1.0</li></ul> \|
	* Samples:
	\| sentence_0 \| sentence_1 \| label \|
	\|:---------------------------------------------------------------------------------------\|:----------------------------------------------------------------------------------------------------------------------------------------------------------\|:-----------------\|
	\| <code>// SetDefaultVersionId sets the DefaultVersionId field's value.</code> \| <code>func (s Policy) SetDefaultVersionId(v string) Policy {<br> s.DefaultVersionId = &v<br> return s<br>}</code> \| <code>1.0</code> \|
	\| <code>// SetNextPageToken sets the NextPageToken field's value.</code> \| <code>func (s ListBudgetsForResourceOutput) SetNextPageToken(v string) ListBudgetsForResourceOutput {<br> s.NextPageToken = &v<br> return s<br>}</code> \| <code>1.0</code> \|
	\| <code>// SetHealthyThresholdCount sets the HealthyThresholdCount field's value.</code> \| <code>func (s TargetGroup) SetHealthyThresholdCount(v int64) TargetGroup {<br> s.HealthyThresholdCount = &v<br> return s<br>}</code> \| <code>1.0</code> \|
	* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
	```json
	{
	"scale": 20.0,
	"similarity_fct": "cos_sim"
	}
	```

	### Training Hyperparameters
	#### Non-Default Hyperparameters

	- `per_device_train_batch_size`: 200
	- `per_device_eval_batch_size`: 200
	- `fp16`: True
	- `multi_dataset_batch_sampler`: round_robin

	#### All Hyperparameters
	<details><summary>Click to expand</summary>

	- `overwrite_output_dir`: False
	- `do_predict`: False
	- `eval_strategy`: no
	- `prediction_loss_only`: True
	- `per_device_train_batch_size`: 200
	- `per_device_eval_batch_size`: 200
	- `per_gpu_train_batch_size`: None
	- `per_gpu_eval_batch_size`: None
	- `gradient_accumulation_steps`: 1
	- `eval_accumulation_steps`: None
	- `torch_empty_cache_steps`: None
	- `learning_rate`: 5e-05
	- `weight_decay`: 0.0
	- `adam_beta1`: 0.9
	- `adam_beta2`: 0.999
	- `adam_epsilon`: 1e-08
	- `max_grad_norm`: 1
	- `num_train_epochs`: 3
	- `max_steps`: -1
	- `lr_scheduler_type`: linear
	- `lr_scheduler_kwargs`: {}
	- `warmup_ratio`: 0.0
	- `warmup_steps`: 0
	- `log_level`: passive
	- `log_level_replica`: warning
	- `log_on_each_node`: True
	- `logging_nan_inf_filter`: True
	- `save_safetensors`: True
	- `save_on_each_node`: False
	- `save_only_model`: False
	- `restore_callback_states_from_checkpoint`: False
	- `no_cuda`: False
	- `use_cpu`: False
	- `use_mps_device`: False
	- `seed`: 42
	- `data_seed`: None
	- `jit_mode_eval`: False
	- `use_ipex`: False
	- `bf16`: False
	- `fp16`: True
	- `fp16_opt_level`: O1
	- `half_precision_backend`: auto
	- `bf16_full_eval`: False
	- `fp16_full_eval`: False
	- `tf32`: None
	- `local_rank`: 0
	- `ddp_backend`: None
	- `tpu_num_cores`: None
	- `tpu_metrics_debug`: False
	- `debug`: []
	- `dataloader_drop_last`: False
	- `dataloader_num_workers`: 0
	- `dataloader_prefetch_factor`: None
	- `past_index`: -1
	- `disable_tqdm`: False
	- `remove_unused_columns`: True
	- `label_names`: None
	- `load_best_model_at_end`: False
	- `ignore_data_skip`: False
	- `fsdp`: []
	- `fsdp_min_num_params`: 0
	- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
	- `fsdp_transformer_layer_cls_to_wrap`: None
	- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
	- `deepspeed`: None
	- `label_smoothing_factor`: 0.0
	- `optim`: adamw_torch
	- `optim_args`: None
	- `adafactor`: False
	- `group_by_length`: False
	- `length_column_name`: length
	- `ddp_find_unused_parameters`: None
	- `ddp_bucket_cap_mb`: None
	- `ddp_broadcast_buffers`: False
	- `dataloader_pin_memory`: True
	- `dataloader_persistent_workers`: False
	- `skip_memory_metrics`: True
	- `use_legacy_prediction_loop`: False
	- `push_to_hub`: False
	- `resume_from_checkpoint`: None
	- `hub_model_id`: None
	- `hub_strategy`: every_save
	- `hub_private_repo`: None
	- `hub_always_push`: False
	- `gradient_checkpointing`: False
	- `gradient_checkpointing_kwargs`: None
	- `include_inputs_for_metrics`: False
	- `include_for_metrics`: []
	- `eval_do_concat_batches`: True
	- `fp16_backend`: auto
	- `push_to_hub_model_id`: None
	- `push_to_hub_organization`: None
	- `mp_parameters`:
	- `auto_find_batch_size`: False
	- `full_determinism`: False
	- `torchdynamo`: None
	- `ray_scope`: last
	- `ddp_timeout`: 1800
	- `torch_compile`: False
	- `torch_compile_backend`: None
	- `torch_compile_mode`: None
	- `include_tokens_per_second`: False
	- `include_num_input_tokens_seen`: False
	- `neftune_noise_alpha`: None
	- `optim_target_modules`: None
	- `batch_eval_metrics`: False
	- `eval_on_start`: False
	- `use_liger_kernel`: False
	- `eval_use_gather_object`: False
	- `average_tokens_across_devices`: False
	- `prompts`: None
	- `batch_sampler`: batch_sampler
	- `multi_dataset_batch_sampler`: round_robin

	</details>

	### Training Logs
	<details><summary>Click to expand</summary>

	\| Epoch \| Step \| Training Loss \|
	\|:------:\|:------:\|:-------------:\|
	\| 0.0142 \| 500 \| 1.1661 \|
	\| 0.0283 \| 1000 \| 0.1176 \|
	\| 0.0425 \| 1500 \| 0.1096 \|
	\| 0.0567 \| 2000 \| 0.1013 \|
	\| 0.0708 \| 2500 \| 0.0967 \|
	\| 0.0850 \| 3000 \| 0.0912 \|
	\| 0.0992 \| 3500 \| 0.0886 \|
	\| 0.1133 \| 4000 \| 0.0799 \|
	\| 0.1275 \| 4500 \| 0.0776 \|
	\| 0.1417 \| 5000 \| 0.0757 \|
	\| 0.1558 \| 5500 \| 0.0751 \|
	\| 0.1700 \| 6000 \| 0.0714 \|
	\| 0.1842 \| 6500 \| 0.0703 \|
	\| 0.1983 \| 7000 \| 0.0667 \|
	\| 0.2125 \| 7500 \| 0.0674 \|
	\| 0.2267 \| 8000 \| 0.0625 \|
	\| 0.2408 \| 8500 \| 0.0598 \|
	\| 0.2550 \| 9000 \| 0.0597 \|
	\| 0.2692 \| 9500 \| 0.0585 \|
	\| 0.2833 \| 10000 \| 0.0568 \|
	\| 0.2975 \| 10500 \| 0.055 \|
	\| 0.3117 \| 11000 \| 0.0554 \|
	\| 0.3258 \| 11500 \| 0.0529 \|
	\| 0.3400 \| 12000 \| 0.0516 \|
	\| 0.3541 \| 12500 \| 0.0506 \|
	\| 0.3683 \| 13000 \| 0.05 \|
	\| 0.3825 \| 13500 \| 0.0484 \|
	\| 0.3966 \| 14000 \| 0.0472 \|
	\| 0.4108 \| 14500 \| 0.0468 \|
	\| 0.4250 \| 15000 \| 0.045 \|
	\| 0.4391 \| 15500 \| 0.046 \|
	\| 0.4533 \| 16000 \| 0.0452 \|
	\| 0.4675 \| 16500 \| 0.0428 \|
	\| 0.4816 \| 17000 \| 0.0424 \|
	\| 0.4958 \| 17500 \| 0.04 \|
	\| 0.5100 \| 18000 \| 0.0402 \|
	\| 0.5241 \| 18500 \| 0.0391 \|
	\| 0.5383 \| 19000 \| 0.0389 \|
	\| 0.5525 \| 19500 \| 0.0385 \|
	\| 0.5666 \| 20000 \| 0.0357 \|
	\| 0.5808 \| 20500 \| 0.0362 \|
	\| 0.5950 \| 21000 \| 0.0369 \|
	\| 0.6091 \| 21500 \| 0.0372 \|
	\| 0.6233 \| 22000 \| 0.0351 \|
	\| 0.6375 \| 22500 \| 0.034 \|
	\| 0.6516 \| 23000 \| 0.0364 \|
	\| 0.6658 \| 23500 \| 0.033 \|
	\| 0.6800 \| 24000 \| 0.0336 \|
	\| 0.6941 \| 24500 \| 0.0302 \|
	\| 0.7083 \| 25000 \| 0.0309 \|
	\| 0.7225 \| 25500 \| 0.0306 \|
	\| 0.7366 \| 26000 \| 0.0316 \|
	\| 0.7508 \| 26500 \| 0.0306 \|
	\| 0.7650 \| 27000 \| 0.0307 \|
	\| 0.7791 \| 27500 \| 0.0303 \|
	\| 0.7933 \| 28000 \| 0.028 \|
	\| 0.8075 \| 28500 \| 0.0289 \|
	\| 0.8216 \| 29000 \| 0.0297 \|
	\| 0.8358 \| 29500 \| 0.0281 \|
	\| 0.8500 \| 30000 \| 0.029 \|
	\| 0.8641 \| 30500 \| 0.027 \|
	\| 0.8783 \| 31000 \| 0.0282 \|
	\| 0.8925 \| 31500 \| 0.0264 \|
	\| 0.9066 \| 32000 \| 0.027 \|
	\| 0.9208 \| 32500 \| 0.0259 \|
	\| 0.9350 \| 33000 \| 0.0272 \|
	\| 0.9491 \| 33500 \| 0.0275 \|
	\| 0.9633 \| 34000 \| 0.0244 \|
	\| 0.9774 \| 34500 \| 0.0254 \|
	\| 0.9916 \| 35000 \| 0.0261 \|
	\| 1.0058 \| 35500 \| 0.0189 \|
	\| 1.0199 \| 36000 \| 0.0118 \|
	\| 1.0341 \| 36500 \| 0.012 \|
	\| 1.0483 \| 37000 \| 0.0118 \|
	\| 1.0624 \| 37500 \| 0.0109 \|
	\| 1.0766 \| 38000 \| 0.0123 \|
	\| 1.0908 \| 38500 \| 0.0122 \|
	\| 1.1049 \| 39000 \| 0.0122 \|
	\| 1.1191 \| 39500 \| 0.0123 \|
	\| 1.1333 \| 40000 \| 0.0117 \|
	\| 1.1474 \| 40500 \| 0.0115 \|
	\| 1.1616 \| 41000 \| 0.0122 \|
	\| 1.1758 \| 41500 \| 0.0117 \|
	\| 1.1899 \| 42000 \| 0.0119 \|
	\| 1.2041 \| 42500 \| 0.0112 \|
	\| 1.2183 \| 43000 \| 0.0122 \|
	\| 1.2324 \| 43500 \| 0.0116 \|
	\| 1.2466 \| 44000 \| 0.0107 \|
	\| 1.2608 \| 44500 \| 0.0126 \|
	\| 1.2749 \| 45000 \| 0.0114 \|
	\| 1.2891 \| 45500 \| 0.011 \|
	\| 1.3033 \| 46000 \| 0.0116 \|
	\| 1.3174 \| 46500 \| 0.0114 \|
	\| 1.3316 \| 47000 \| 0.0111 \|
	\| 1.3458 \| 47500 \| 0.0112 \|
	\| 1.3599 \| 48000 \| 0.0112 \|
	\| 1.3741 \| 48500 \| 0.0115 \|
	\| 1.3883 \| 49000 \| 0.0104 \|
	\| 1.4024 \| 49500 \| 0.0109 \|
	\| 1.4166 \| 50000 \| 0.0113 \|
	\| 1.4308 \| 50500 \| 0.0115 \|
	\| 1.4449 \| 51000 \| 0.0103 \|
	\| 1.4591 \| 51500 \| 0.0114 \|
	\| 1.4733 \| 52000 \| 0.0104 \|
	\| 1.4874 \| 52500 \| 0.0106 \|
	\| 1.5016 \| 53000 \| 0.0103 \|
	\| 1.5158 \| 53500 \| 0.0102 \|
	\| 1.5299 \| 54000 \| 0.0101 \|
	\| 1.5441 \| 54500 \| 0.0104 \|
	\| 1.5583 \| 55000 \| 0.011 \|
	\| 1.5724 \| 55500 \| 0.0107 \|
	\| 1.5866 \| 56000 \| 0.0097 \|
	\| 1.6007 \| 56500 \| 0.0099 \|
	\| 1.6149 \| 57000 \| 0.0102 \|
	\| 1.6291 \| 57500 \| 0.0098 \|
	\| 1.6432 \| 58000 \| 0.01 \|
	\| 1.6574 \| 58500 \| 0.0096 \|
	\| 1.6716 \| 59000 \| 0.0099 \|
	\| 1.6857 \| 59500 \| 0.0103 \|
	\| 1.6999 \| 60000 \| 0.0098 \|
	\| 1.7141 \| 60500 \| 0.0097 \|
	\| 1.7282 \| 61000 \| 0.0094 \|
	\| 1.7424 \| 61500 \| 0.0093 \|
	\| 1.7566 \| 62000 \| 0.0102 \|
	\| 1.7707 \| 62500 \| 0.0099 \|
	\| 1.7849 \| 63000 \| 0.0098 \|
	\| 1.7991 \| 63500 \| 0.009 \|
	\| 1.8132 \| 64000 \| 0.0097 \|
	\| 1.8274 \| 64500 \| 0.009 \|
	\| 1.8416 \| 65000 \| 0.0093 \|
	\| 1.8557 \| 65500 \| 0.0092 \|
	\| 1.8699 \| 66000 \| 0.0095 \|
	\| 1.8841 \| 66500 \| 0.0093 \|
	\| 1.8982 \| 67000 \| 0.0094 \|
	\| 1.9124 \| 67500 \| 0.0089 \|
	\| 1.9266 \| 68000 \| 0.0091 \|
	\| 1.9407 \| 68500 \| 0.0089 \|
	\| 1.9549 \| 69000 \| 0.0084 \|
	\| 1.9691 \| 69500 \| 0.0087 \|
	\| 1.9832 \| 70000 \| 0.0094 \|
	\| 1.9974 \| 70500 \| 0.0085 \|
	\| 2.0116 \| 71000 \| 0.0049 \|
	\| 2.0257 \| 71500 \| 0.0041 \|
	\| 2.0399 \| 72000 \| 0.0039 \|
	\| 2.0541 \| 72500 \| 0.0038 \|
	\| 2.0682 \| 73000 \| 0.004 \|
	\| 2.0824 \| 73500 \| 0.0039 \|
	\| 2.0966 \| 74000 \| 0.0038 \|
	\| 2.1107 \| 74500 \| 0.0041 \|
	\| 2.1249 \| 75000 \| 0.0037 \|
	\| 2.1391 \| 75500 \| 0.0038 \|
	\| 2.1532 \| 76000 \| 0.0041 \|
	\| 2.1674 \| 76500 \| 0.0036 \|
	\| 2.1816 \| 77000 \| 0.0039 \|
	\| 2.1957 \| 77500 \| 0.0039 \|
	\| 2.2099 \| 78000 \| 0.0038 \|
	\| 2.2240 \| 78500 \| 0.0038 \|
	\| 2.2382 \| 79000 \| 0.0037 \|
	\| 2.2524 \| 79500 \| 0.0037 \|
	\| 2.2665 \| 80000 \| 0.0036 \|
	\| 2.2807 \| 80500 \| 0.0038 \|
	\| 2.2949 \| 81000 \| 0.0037 \|
	\| 2.3090 \| 81500 \| 0.0036 \|
	\| 2.3232 \| 82000 \| 0.0036 \|
	\| 2.3374 \| 82500 \| 0.0038 \|
	\| 2.3515 \| 83000 \| 0.0037 \|
	\| 2.3657 \| 83500 \| 0.0037 \|
	\| 2.3799 \| 84000 \| 0.0038 \|
	\| 2.3940 \| 84500 \| 0.0037 \|
	\| 2.4082 \| 85000 \| 0.0036 \|
	\| 2.4224 \| 85500 \| 0.0034 \|
	\| 2.4365 \| 86000 \| 0.0035 \|
	\| 2.4507 \| 86500 \| 0.0033 \|
	\| 2.4649 \| 87000 \| 0.0036 \|
	\| 2.4790 \| 87500 \| 0.0035 \|
	\| 2.4932 \| 88000 \| 0.0034 \|
	\| 2.5074 \| 88500 \| 0.0034 \|
	\| 2.5215 \| 89000 \| 0.0034 \|
	\| 2.5357 \| 89500 \| 0.0031 \|
	\| 2.5499 \| 90000 \| 0.0033 \|
	\| 2.5640 \| 90500 \| 0.0033 \|
	\| 2.5782 \| 91000 \| 0.0035 \|
	\| 2.5924 \| 91500 \| 0.0033 \|
	\| 2.6065 \| 92000 \| 0.0032 \|
	\| 2.6207 \| 92500 \| 0.0034 \|
	\| 2.6349 \| 93000 \| 0.0031 \|
	\| 2.6490 \| 93500 \| 0.0032 \|
	\| 2.6632 \| 94000 \| 0.0032 \|
	\| 2.6774 \| 94500 \| 0.0033 \|
	\| 2.6915 \| 95000 \| 0.0032 \|
	\| 2.7057 \| 95500 \| 0.003 \|
	\| 2.7199 \| 96000 \| 0.0032 \|
	\| 2.7340 \| 96500 \| 0.0032 \|
	\| 2.7482 \| 97000 \| 0.003 \|
	\| 2.7624 \| 97500 \| 0.0032 \|
	\| 2.7765 \| 98000 \| 0.0033 \|
	\| 2.7907 \| 98500 \| 0.003 \|
	\| 2.8049 \| 99000 \| 0.003 \|
	\| 2.8190 \| 99500 \| 0.0031 \|
	\| 2.8332 \| 100000 \| 0.0031 \|
	\| 2.8473 \| 100500 \| 0.003 \|
	\| 2.8615 \| 101000 \| 0.003 \|
	\| 2.8757 \| 101500 \| 0.003 \|
	\| 2.8898 \| 102000 \| 0.003 \|
	\| 2.9040 \| 102500 \| 0.003 \|
	\| 2.9182 \| 103000 \| 0.003 \|
	\| 2.9323 \| 103500 \| 0.003 \|
	\| 2.9465 \| 104000 \| 0.0033 \|
	\| 2.9607 \| 104500 \| 0.0029 \|
	\| 2.9748 \| 105000 \| 0.003 \|
	\| 2.9890 \| 105500 \| 0.0028 \|

	</details>

	### Framework Versions
	- Python: 3.11.13
	- Sentence Transformers: 4.1.0
	- Transformers: 4.52.4
	- PyTorch: 2.6.0+cu124
	- Accelerate: 1.7.0
	- Datasets: 3.6.0
	- Tokenizers: 0.21.1

	## Citation

	### BibTeX

	#### Sentence Transformers
	```bibtex
	@inproceedings{reimers-2019-sentence-bert,
	title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
	author = "Reimers, Nils and Gurevych, Iryna",
	booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
	month = "11",
	year = "2019",
	publisher = "Association for Computational Linguistics",
	url = "https://arxiv.org/abs/1908.10084",
	}
	```

	#### MultipleNegativesRankingLoss
	```bibtex
	@misc{henderson2017efficient,
	title={Efficient Natural Language Response Suggestion for Smart Reply},
	author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
	year={2017},
	eprint={1705.00652},
	archivePrefix={arXiv},
	primaryClass={cs.CL}
	}
	```

	<!--
	## Glossary

	Clearly define terms in order to be accessible across audiences.
	-->

	<!--
	## Model Card Authors

	Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.
	-->

	<!--
	## Model Card Contact

	Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.
	-->