Model save
Browse files- .gitattributes +1 -0
- README.md +58 -0
- adapter_config.json +37 -0
- adapter_model.safetensors +3 -0
- added_tokens.json +3 -0
- chat_template.json +3 -0
- preprocessor_config.json +36 -0
- processor_config.json +4 -0
- runs/May21_15-45-43_selene/events.out.tfevents.1747835160.selene.959022.0 +3 -0
- runs/May21_15-47-50_selene/events.out.tfevents.1747835271.selene.961082.0 +3 -0
- runs/May21_15-48-48_selene/events.out.tfevents.1747835329.selene.961714.0 +3 -0
- runs/May21_16-29-29_selene/events.out.tfevents.1747837770.selene.987629.0 +3 -0
- runs/May21_16-44-48_selene/events.out.tfevents.1747838689.selene.997684.0 +3 -0
- runs/May21_16-45-53_selene/events.out.tfevents.1747838754.selene.998545.0 +3 -0
- runs/May21_16-48-52_selene/events.out.tfevents.1747838933.selene.1001095.0 +3 -0
- runs/May21_16-54-03_selene/events.out.tfevents.1747839244.selene.1004158.0 +3 -0
- runs/May21_20-43-10_selene/events.out.tfevents.1747852991.selene.1076346.0 +3 -0
- runs/May22_06-51-50_selene/events.out.tfevents.1747889512.selene.1338899.0 +3 -0
- runs/May22_06-55-21_selene/events.out.tfevents.1747889723.selene.1339736.0 +3 -0
- runs/May22_06-57-26_selene/events.out.tfevents.1747889847.selene.1340100.0 +3 -0
- runs/May22_06-59-01_selene/events.out.tfevents.1747889942.selene.1340361.0 +3 -0
- runs/May22_07-07-57_selene/events.out.tfevents.1747890479.selene.1343384.0 +3 -0
- runs/May22_07-10-46_selene/events.out.tfevents.1747890648.selene.1345309.0 +3 -0
- runs/May22_07-19-21_selene/events.out.tfevents.1747891163.selene.1348633.0 +3 -0
- runs/May22_07-19-50_selene/events.out.tfevents.1747891191.selene.1348838.0 +3 -0
- runs/May22_07-28-44_selene/events.out.tfevents.1747891726.selene.1350964.0 +3 -0
- runs/May22_07-37-08_selene/events.out.tfevents.1747892229.selene.1352437.0 +3 -0
- runs/May22_07-40-04_selene/events.out.tfevents.1747892406.selene.1353544.0 +3 -0
- runs/May22_07-41-41_selene/events.out.tfevents.1747892503.selene.1353827.0 +3 -0
- runs/May22_07-44-15_selene/events.out.tfevents.1747892658.selene.1354457.0 +3 -0
- runs/May22_07-51-56_selene/events.out.tfevents.1747893118.selene.1357192.0 +3 -0
- runs/May22_07-55-20_selene/events.out.tfevents.1747893321.selene.1358132.0 +3 -0
- runs/May22_08-07-34_selene/events.out.tfevents.1747894055.selene.1360602.0 +3 -0
- runs/May22_08-14-01_selene/events.out.tfevents.1747894443.selene.1361770.0 +3 -0
- runs/May22_08-25-14_selene/events.out.tfevents.1747895117.selene.1366431.0 +3 -0
- runs/May23_00-49-19_selene/events.out.tfevents.1747954163.selene.1807332.0 +3 -0
- special_tokens_map.json +33 -0
- tokenizer.json +3 -0
- tokenizer.model +3 -0
- tokenizer_config.json +0 -0
- training_args.bin +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
base_model: google/gemma-3-4b-it
|
| 3 |
+
library_name: transformers
|
| 4 |
+
model_name: gemma3_fine_tuning2025-05-23
|
| 5 |
+
tags:
|
| 6 |
+
- generated_from_trainer
|
| 7 |
+
- trl
|
| 8 |
+
- sft
|
| 9 |
+
licence: license
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# Model Card for gemma3_fine_tuning2025-05-23
|
| 13 |
+
|
| 14 |
+
This model is a fine-tuned version of [google/gemma-3-4b-it](https://huggingface.co/google/gemma-3-4b-it).
|
| 15 |
+
It has been trained using [TRL](https://github.com/huggingface/trl).
|
| 16 |
+
|
| 17 |
+
## Quick start
|
| 18 |
+
|
| 19 |
+
```python
|
| 20 |
+
from transformers import pipeline
|
| 21 |
+
|
| 22 |
+
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
|
| 23 |
+
generator = pipeline("text-generation", model="alexanderyj/gemma3_fine_tuning2025-05-23", device="cuda")
|
| 24 |
+
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
|
| 25 |
+
print(output["generated_text"])
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
## Training procedure
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
This model was trained with SFT.
|
| 34 |
+
|
| 35 |
+
### Framework versions
|
| 36 |
+
|
| 37 |
+
- TRL: 0.17.0
|
| 38 |
+
- Transformers: 4.51.3
|
| 39 |
+
- Pytorch: 2.7.0
|
| 40 |
+
- Datasets: 3.6.0
|
| 41 |
+
- Tokenizers: 0.21.1
|
| 42 |
+
|
| 43 |
+
## Citations
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
Cite TRL as:
|
| 48 |
+
|
| 49 |
+
```bibtex
|
| 50 |
+
@misc{vonwerra2022trl,
|
| 51 |
+
title = {{TRL: Transformer Reinforcement Learning}},
|
| 52 |
+
author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
|
| 53 |
+
year = 2020,
|
| 54 |
+
journal = {GitHub repository},
|
| 55 |
+
publisher = {GitHub},
|
| 56 |
+
howpublished = {\url{https://github.com/huggingface/trl}}
|
| 57 |
+
}
|
| 58 |
+
```
|
adapter_config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "google/gemma-3-4b-it",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"eva_config": null,
|
| 7 |
+
"exclude_modules": null,
|
| 8 |
+
"fan_in_fan_out": false,
|
| 9 |
+
"inference_mode": true,
|
| 10 |
+
"init_lora_weights": true,
|
| 11 |
+
"layer_replication": null,
|
| 12 |
+
"layers_pattern": null,
|
| 13 |
+
"layers_to_transform": null,
|
| 14 |
+
"loftq_config": {},
|
| 15 |
+
"lora_alpha": 16,
|
| 16 |
+
"lora_bias": false,
|
| 17 |
+
"lora_dropout": 0.05,
|
| 18 |
+
"megatron_config": null,
|
| 19 |
+
"megatron_core": "megatron.core",
|
| 20 |
+
"modules_to_save": [],
|
| 21 |
+
"peft_type": "LORA",
|
| 22 |
+
"r": 16,
|
| 23 |
+
"rank_pattern": {},
|
| 24 |
+
"revision": null,
|
| 25 |
+
"target_modules": [
|
| 26 |
+
"gate_proj",
|
| 27 |
+
"down_proj",
|
| 28 |
+
"up_proj",
|
| 29 |
+
"q_proj",
|
| 30 |
+
"o_proj",
|
| 31 |
+
"v_proj",
|
| 32 |
+
"k_proj"
|
| 33 |
+
],
|
| 34 |
+
"task_type": "CAUSAL_LM",
|
| 35 |
+
"use_dora": false,
|
| 36 |
+
"use_rslora": false
|
| 37 |
+
}
|
adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4be52b9c21061efefacc45d8d6998446e843fd8f8ec05e14a94461aa0486ab1c
|
| 3 |
+
size 119273568
|
added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<image_soft_token>": 262144
|
| 3 |
+
}
|
chat_template.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n"
|
| 3 |
+
}
|
preprocessor_config.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"crop_size": null,
|
| 3 |
+
"data_format": "channels_first",
|
| 4 |
+
"default_to_square": true,
|
| 5 |
+
"device": null,
|
| 6 |
+
"do_center_crop": null,
|
| 7 |
+
"do_convert_rgb": null,
|
| 8 |
+
"do_normalize": true,
|
| 9 |
+
"do_pan_and_scan": null,
|
| 10 |
+
"do_rescale": true,
|
| 11 |
+
"do_resize": true,
|
| 12 |
+
"image_mean": [
|
| 13 |
+
0.5,
|
| 14 |
+
0.5,
|
| 15 |
+
0.5
|
| 16 |
+
],
|
| 17 |
+
"image_processor_type": "Gemma3ImageProcessorFast",
|
| 18 |
+
"image_seq_length": 256,
|
| 19 |
+
"image_std": [
|
| 20 |
+
0.5,
|
| 21 |
+
0.5,
|
| 22 |
+
0.5
|
| 23 |
+
],
|
| 24 |
+
"input_data_format": null,
|
| 25 |
+
"pan_and_scan_max_num_crops": null,
|
| 26 |
+
"pan_and_scan_min_crop_size": null,
|
| 27 |
+
"pan_and_scan_min_ratio_to_activate": null,
|
| 28 |
+
"processor_class": "Gemma3Processor",
|
| 29 |
+
"resample": 2,
|
| 30 |
+
"rescale_factor": 0.00392156862745098,
|
| 31 |
+
"return_tensors": null,
|
| 32 |
+
"size": {
|
| 33 |
+
"height": 896,
|
| 34 |
+
"width": 896
|
| 35 |
+
}
|
| 36 |
+
}
|
processor_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image_seq_length": 256,
|
| 3 |
+
"processor_class": "Gemma3Processor"
|
| 4 |
+
}
|
runs/May21_15-45-43_selene/events.out.tfevents.1747835160.selene.959022.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d60c3c65b704855fd1b75c246449cd2cb8d1a0577b6878d2e65ef581d704a07b
|
| 3 |
+
size 5844
|
runs/May21_15-47-50_selene/events.out.tfevents.1747835271.selene.961082.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb175d4a7a6d714fab1eaaf7328512736211822f70910e01d2745ac54c412e27
|
| 3 |
+
size 5844
|
runs/May21_15-48-48_selene/events.out.tfevents.1747835329.selene.961714.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1806c200a6e53c10a58f264cb8bae6e5f7f7f69b5812a3bd0f64fd4b714cfef5
|
| 3 |
+
size 11028
|
runs/May21_16-29-29_selene/events.out.tfevents.1747837770.selene.987629.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fc84c3f7c6792fd6f85f3a67310c452b1455ab1eaa8787a0030084068decfbf
|
| 3 |
+
size 6168
|
runs/May21_16-44-48_selene/events.out.tfevents.1747838689.selene.997684.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cf81ee3791d45bc4d403aea151ae08692f75d252934d73c88efa245931930bc
|
| 3 |
+
size 5844
|
runs/May21_16-45-53_selene/events.out.tfevents.1747838754.selene.998545.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05294b1a3e15a111b26dbfdb5568231800dd985d930b2c71c3db0f5c8222a45b
|
| 3 |
+
size 5844
|
runs/May21_16-48-52_selene/events.out.tfevents.1747838933.selene.1001095.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82f4a515098233573efa07e229779b4b218c6a71824e33330bd316199fac7d6e
|
| 3 |
+
size 6493
|
runs/May21_16-54-03_selene/events.out.tfevents.1747839244.selene.1004158.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d17df8146d08436a12c41b286e30416d73d0fa8adcc29205db82373e2b6875a
|
| 3 |
+
size 25495
|
runs/May21_20-43-10_selene/events.out.tfevents.1747852991.selene.1076346.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:776fb8208bf1025a8af8cb64ea56627593f9bc3960a430a176cb944ebb7c78b7
|
| 3 |
+
size 143635
|
runs/May22_06-51-50_selene/events.out.tfevents.1747889512.selene.1338899.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4d9076af15237dbb81dc4e72195d0c24f90bee07077065890fc75b0face9412
|
| 3 |
+
size 5845
|
runs/May22_06-55-21_selene/events.out.tfevents.1747889723.selene.1339736.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec905e91e8c25a0c98f8af2a71ccb409c7ac2c1b6c3f3954d0ba1859fc84bef4
|
| 3 |
+
size 5845
|
runs/May22_06-57-26_selene/events.out.tfevents.1747889847.selene.1340100.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:321eb0a9bd6dfb7ae8a36a49adac9534641818a06c2d4b24ab1d703b3c03d69e
|
| 3 |
+
size 5845
|
runs/May22_06-59-01_selene/events.out.tfevents.1747889942.selene.1340361.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3dcc921c664befcc4e50e602bf787e7f05eb575239847f21f58f09a4da1fd65b
|
| 3 |
+
size 5845
|
runs/May22_07-07-57_selene/events.out.tfevents.1747890479.selene.1343384.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3f315e07f15d7514aa1bdab4f3777b73c61112e8b21b9373b7914c96cfe537e
|
| 3 |
+
size 5845
|
runs/May22_07-10-46_selene/events.out.tfevents.1747890648.selene.1345309.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cfae4063fc30256fd7b6b4dcfe6dd476b60b07ffcbd642cfaed279e47af942b
|
| 3 |
+
size 5845
|
runs/May22_07-19-21_selene/events.out.tfevents.1747891163.selene.1348633.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae2a7d17b5446f1202c0563fa94127c1e711fb665727aa8699ce0694c0a77fb9
|
| 3 |
+
size 5845
|
runs/May22_07-19-50_selene/events.out.tfevents.1747891191.selene.1348838.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2cbf11091fd0b6580364ccd01a3a5b10b6c1dbe663a18164638423d5b906cb4d
|
| 3 |
+
size 5845
|
runs/May22_07-28-44_selene/events.out.tfevents.1747891726.selene.1350964.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c01e0aec1a04a20162a86ae3228e629514ba66a8abcf2bae2427c2338c557ff
|
| 3 |
+
size 5845
|
runs/May22_07-37-08_selene/events.out.tfevents.1747892229.selene.1352437.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed35940f4a197a37670bb1255a1b3332378b3bb721ea872ad325af84b5b844ae
|
| 3 |
+
size 5845
|
runs/May22_07-40-04_selene/events.out.tfevents.1747892406.selene.1353544.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:beee64d23cc0fc62acadb50740d4341ca30a247e60b36d8eba513b038171ab01
|
| 3 |
+
size 5845
|
runs/May22_07-41-41_selene/events.out.tfevents.1747892503.selene.1353827.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ea9bf417f2d8e018c397e7e5e497b760f4484d5a530287888c0aa32b9dd233e
|
| 3 |
+
size 5845
|
runs/May22_07-44-15_selene/events.out.tfevents.1747892658.selene.1354457.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9acea17cc07e56dbc3dd89c1539a78c77fba7511f57f12e45966d8236af1bc12
|
| 3 |
+
size 5845
|
runs/May22_07-51-56_selene/events.out.tfevents.1747893118.selene.1357192.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae4d738c156a75c124d962c4c3e106714e2aab4fd8fe1228348318055049f541
|
| 3 |
+
size 5846
|
runs/May22_07-55-20_selene/events.out.tfevents.1747893321.selene.1358132.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae41cc85862eca8eaa241e7d5e2318134db65ea787b4aec9a5e53588ffacb16f
|
| 3 |
+
size 5846
|
runs/May22_08-07-34_selene/events.out.tfevents.1747894055.selene.1360602.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5248c673f9a016bc41c19b791abfe9d12f892261f966ee500b3d8af2c9a3a43
|
| 3 |
+
size 5849
|
runs/May22_08-14-01_selene/events.out.tfevents.1747894443.selene.1361770.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39aab14d150899e9fd60c0a7a0e104ef98eacbda218cbe6832fafa387c11c622
|
| 3 |
+
size 10795
|
runs/May22_08-25-14_selene/events.out.tfevents.1747895117.selene.1366431.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4940183077d9521ce2da8b913b11e6280f6d4203765ed6d16cb2536f38eb2787
|
| 3 |
+
size 253699
|
runs/May23_00-49-19_selene/events.out.tfevents.1747954163.selene.1807332.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9032697d1292bd37602741d28427665f35985e40df76c739d83a1cf91a7175d1
|
| 3 |
+
size 6199
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"boi_token": "<start_of_image>",
|
| 3 |
+
"bos_token": {
|
| 4 |
+
"content": "<bos>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false
|
| 9 |
+
},
|
| 10 |
+
"eoi_token": "<end_of_image>",
|
| 11 |
+
"eos_token": {
|
| 12 |
+
"content": "<eos>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false
|
| 17 |
+
},
|
| 18 |
+
"image_token": "<image_soft_token>",
|
| 19 |
+
"pad_token": {
|
| 20 |
+
"content": "<pad>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false
|
| 25 |
+
},
|
| 26 |
+
"unk_token": {
|
| 27 |
+
"content": "<unk>",
|
| 28 |
+
"lstrip": false,
|
| 29 |
+
"normalized": false,
|
| 30 |
+
"rstrip": false,
|
| 31 |
+
"single_word": false
|
| 32 |
+
}
|
| 33 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
|
| 3 |
+
size 33384568
|
tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
|
| 3 |
+
size 4689074
|
tokenizer_config.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f28b3a4f3113f65ddf1b52b1414cbbb56125328ec4b07ba255d4bd34f0ebf9d
|
| 3 |
+
size 6225
|