abarbosa commited on
Commit
999cd63
·
verified ·
1 Parent(s): f11f308

Pushing fine-tuned model to Hugging Face Hub

Browse files
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ language:
4
+ - pt
5
+ - en
6
+ tags:
7
+ - aes
8
+ datasets:
9
+ - kamel-usp/aes_enem_dataset
10
+ base_model: TucanoBR/Tucano-2b4-Instruct
11
+ metrics:
12
+ - accuracy
13
+ - qwk
14
+ library_name: peft
15
+ model-index:
16
+ - name: Tucano-2b4-Instruct-tucano_classification_lora-C3-full_context-r16
17
+ results:
18
+ - task:
19
+ type: text-classification
20
+ name: Automated Essay Score
21
+ dataset:
22
+ name: Automated Essay Score ENEM Dataset
23
+ type: kamel-usp/aes_enem_dataset
24
+ config: JBCS2025
25
+ split: test
26
+ metrics:
27
+ - name: Macro F1
28
+ type: f1
29
+ value: 0.2078841553417824
30
+ - name: QWK
31
+ type: qwk
32
+ value: 0.235159599809433
33
+ - name: Weighted Macro F1
34
+ type: f1
35
+ value: 0.2625404475957165
36
+ ---
37
+ # Model ID: Tucano-2b4-Instruct-tucano_classification_lora-C3-full_context-r16
38
+ ## Results
39
+ | | test_data |
40
+ |:-----------------|------------:|
41
+ | eval_accuracy | 0.26087 |
42
+ | eval_RMSE | 63.6111 |
43
+ | eval_QWK | 0.23516 |
44
+ | eval_Macro_F1 | 0.207884 |
45
+ | eval_Weighted_F1 | 0.26254 |
46
+ | eval_Micro_F1 | 0.26087 |
47
+ | eval_HDIV | 0.130435 |
48
+
adapter_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "TucanoBR/Tucano-2b4-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.1,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": [
22
+ "classifier",
23
+ "score"
24
+ ],
25
+ "peft_type": "LORA",
26
+ "qalora_group_size": 16,
27
+ "r": 16,
28
+ "rank_pattern": {},
29
+ "revision": null,
30
+ "target_modules": [
31
+ "v_proj",
32
+ "gate_proj",
33
+ "o_proj",
34
+ "k_proj",
35
+ "up_proj",
36
+ "q_proj",
37
+ "down_proj"
38
+ ],
39
+ "task_type": "SEQ_CLS",
40
+ "trainable_token_indices": null,
41
+ "use_dora": false,
42
+ "use_qalora": false,
43
+ "use_rslora": false
44
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32a5c7b54bcbd6e5e5e6b873320b633474d8460c7f0dfe865b56ab279753a168
3
+ size 84647656
chat_template.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ {% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '<instruction>' + message['content'].strip() + '</instruction>'}}{% elif message['role'] == 'assistant' %}{{ message['content'].strip() + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}
emissions.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ timestamp,project_name,run_id,experiment_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
2
+ 2025-07-13T01:02:32,jbcs2025,1d4e3743-fe9a-40b5-880b-87cdb812cd3c,Tucano-2b4-Instruct-tucano_classification_lora-C3-full_context-r16,1662.6968799540773,0.0008182697334020824,4.921340403457559e-07,66.0,573.9388327057027,70.0,0.030032784493672124,0.2824001650866421,0.03178012286824588,0.3442130724485601,Canada,CAN,quebec,,,Linux-5.15.0-130-generic-x86_64-with-glibc2.35,3.12.11,3.0.2,192,INTEL(R) XEON(R) PLATINUM 8558,1,1 x NVIDIA H200,-71.2,46.8,2015.3516235351562,machine,N,1.0
evaluation_results.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ eval_loss,eval_model_preparation_time,eval_accuracy,eval_RMSE,eval_QWK,eval_HDIV,eval_Macro_F1,eval_Micro_F1,eval_Weighted_F1,eval_TP_0,eval_TN_0,eval_FP_0,eval_FN_0,eval_TP_1,eval_TN_1,eval_FP_1,eval_FN_1,eval_TP_2,eval_TN_2,eval_FP_2,eval_FN_2,eval_TP_3,eval_TN_3,eval_FP_3,eval_FN_3,eval_TP_4,eval_TN_4,eval_FP_4,eval_FN_4,eval_TP_5,eval_TN_5,eval_FP_5,eval_FN_5,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,reference,timestamp,id
2
+ 2.344093084335327,0.0094,0.05303030303030303,110.26139740442146,-0.137110072832211,0.4696969696969697,0.023333333333333334,0.05303030303030303,0.008484848484848486,0,97,34,1,0,102,6,24,0,126,0,6,0,79,0,53,0,92,0,40,7,39,85,1,8.0142,16.471,4.118,-1,validation_before_training,2025-07-13 00:35:03,Tucano-2b4-Instruct-tucano_classification_lora-C3-full_context-r16
3
+ 1.5726227760314941,0.0094,0.36363636363636365,49.60449637488582,0.48971663619744055,0.06060606060606055,0.27840956519709686,0.36363636363636365,0.3581512903699144,0,131,0,1,5,99,9,19,3,113,13,3,14,67,12,39,21,59,33,19,5,107,17,3,7.7605,17.009,4.252,15.0,validation_after_training,2025-07-13 00:35:03,Tucano-2b4-Instruct-tucano_classification_lora-C3-full_context-r16
4
+ 1.9798592329025269,0.0094,0.2608695652173913,63.611137480744695,0.23515959980943302,0.13043478260869568,0.20788415534178248,0.2608695652173913,0.26254044759571654,0,137,0,1,4,83,26,25,7,103,17,11,9,72,21,36,14,78,22,24,2,115,16,5,8.2728,16.681,4.231,15.0,test_results,2025-07-13 00:35:03,Tucano-2b4-Instruct-tucano_classification_lora-C3-full_context-r16
run_experiment.log ADDED
@@ -0,0 +1,801 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-07-13 00:34:45,787][__main__][INFO] - cache_dir: /tmp/
2
+ dataset:
3
+ name: kamel-usp/aes_enem_dataset
4
+ split: JBCS2025
5
+ training_params:
6
+ seed: 42
7
+ num_train_epochs: 20
8
+ logging_steps: 100
9
+ metric_for_best_model: QWK
10
+ bf16: true
11
+ bootstrap:
12
+ enabled: true
13
+ n_bootstrap: 10000
14
+ bootstrap_seed: 42
15
+ metrics:
16
+ - QWK
17
+ - Macro_F1
18
+ - Weighted_F1
19
+ post_training_results:
20
+ model_path: /workspace/jbcs2025/outputs/2025-03-24/20-42-59
21
+ experiments:
22
+ model:
23
+ name: TucanoBR/Tucano-2b4-Instruct
24
+ type: tucano_classification_lora
25
+ num_labels: 6
26
+ output_dir: ./results/
27
+ logging_dir: ./logs/
28
+ best_model_dir: ./results/best_model
29
+ lora_r: 16
30
+ lora_dropout: 0.1
31
+ lora_alpha: 32
32
+ lora_target_modules: all-linear
33
+ checkpoint_path: ''
34
+ tokenizer:
35
+ name: TucanoBR/Tucano-2b4-Instruct
36
+ dataset:
37
+ grade_index: 2
38
+ use_full_context: true
39
+ training_params:
40
+ weight_decay: 0.01
41
+ warmup_ratio: 0.1
42
+ learning_rate: 5.0e-05
43
+ train_batch_size: 8
44
+ eval_batch_size: 4
45
+ gradient_accumulation_steps: 2
46
+ gradient_checkpointing: true
47
+
48
+ [2025-07-13 00:34:50,649][__main__][INFO] - GPU 0: NVIDIA H200 | TDP ≈ 700 W
49
+ [2025-07-13 00:34:50,649][__main__][INFO] - Starting the Fine Tuning training process.
50
+ [2025-07-13 00:34:55,571][transformers.tokenization_utils_base][INFO] - loading file tokenizer.model from cache at None
51
+ [2025-07-13 00:34:55,571][transformers.tokenization_utils_base][INFO] - loading file tokenizer.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/tokenizer.json
52
+ [2025-07-13 00:34:55,571][transformers.tokenization_utils_base][INFO] - loading file added_tokens.json from cache at None
53
+ [2025-07-13 00:34:55,571][transformers.tokenization_utils_base][INFO] - loading file special_tokens_map.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/special_tokens_map.json
54
+ [2025-07-13 00:34:55,571][transformers.tokenization_utils_base][INFO] - loading file tokenizer_config.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/tokenizer_config.json
55
+ [2025-07-13 00:34:55,571][transformers.tokenization_utils_base][INFO] - loading file chat_template.jinja from cache at None
56
+ [2025-07-13 00:34:55,613][transformers.tokenization_utils_base][INFO] - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
57
+ [2025-07-13 00:34:55,620][__main__][INFO] - Tokenizer function parameters- Padding:longest; Truncation: False; Use Full Context: True
58
+ [2025-07-13 00:34:56,792][__main__][INFO] -
59
+ Token statistics for 'train' split:
60
+ [2025-07-13 00:34:56,792][__main__][INFO] - Total examples: 500
61
+ [2025-07-13 00:34:56,792][__main__][INFO] - Min tokens: 2937
62
+ [2025-07-13 00:34:56,792][__main__][INFO] - Max tokens: 2937
63
+ [2025-07-13 00:34:56,792][__main__][INFO] - Avg tokens: 2937.00
64
+ [2025-07-13 00:34:56,792][__main__][INFO] - Std tokens: 0.00
65
+ [2025-07-13 00:34:57,058][__main__][INFO] -
66
+ Token statistics for 'validation' split:
67
+ [2025-07-13 00:34:57,058][__main__][INFO] - Total examples: 132
68
+ [2025-07-13 00:34:57,058][__main__][INFO] - Min tokens: 3139
69
+ [2025-07-13 00:34:57,058][__main__][INFO] - Max tokens: 3139
70
+ [2025-07-13 00:34:57,058][__main__][INFO] - Avg tokens: 3139.00
71
+ [2025-07-13 00:34:57,058][__main__][INFO] - Std tokens: 0.00
72
+ [2025-07-13 00:34:57,340][__main__][INFO] -
73
+ Token statistics for 'test' split:
74
+ [2025-07-13 00:34:57,340][__main__][INFO] - Total examples: 138
75
+ [2025-07-13 00:34:57,340][__main__][INFO] - Min tokens: 3162
76
+ [2025-07-13 00:34:57,340][__main__][INFO] - Max tokens: 3162
77
+ [2025-07-13 00:34:57,340][__main__][INFO] - Avg tokens: 3162.00
78
+ [2025-07-13 00:34:57,340][__main__][INFO] - Std tokens: 0.00
79
+ [2025-07-13 00:34:57,340][__main__][INFO] - If token statistics are the same (max, avg, min) keep in mind that this is due to batched tokenization and padding.
80
+ [2025-07-13 00:34:57,340][__main__][INFO] - Model max length: 4096. If it is the same as stats, then there is a high chance that sequences are being truncated.
81
+ [2025-07-13 00:34:57,508][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
82
+ [2025-07-13 00:34:57,509][transformers.configuration_utils][INFO] - Model config LlamaConfig {
83
+ "architectures": [
84
+ "LlamaForCausalLM"
85
+ ],
86
+ "attention_bias": false,
87
+ "attention_dropout": 0.0,
88
+ "bos_token_id": 1,
89
+ "eos_token_id": 2,
90
+ "head_dim": 160,
91
+ "hidden_act": "silu",
92
+ "hidden_size": 2560,
93
+ "id2label": {
94
+ "0": "LABEL_0",
95
+ "1": "LABEL_1",
96
+ "2": "LABEL_2",
97
+ "3": "LABEL_3",
98
+ "4": "LABEL_4",
99
+ "5": "LABEL_5"
100
+ },
101
+ "initializer_range": 0.02,
102
+ "intermediate_size": 10240,
103
+ "label2id": {
104
+ "LABEL_0": 0,
105
+ "LABEL_1": 1,
106
+ "LABEL_2": 2,
107
+ "LABEL_3": 3,
108
+ "LABEL_4": 4,
109
+ "LABEL_5": 5
110
+ },
111
+ "max_position_embeddings": 4096,
112
+ "mlp_bias": false,
113
+ "model_type": "llama",
114
+ "num_attention_heads": 16,
115
+ "num_hidden_layers": 24,
116
+ "num_key_value_heads": 4,
117
+ "pad_token_id": 3,
118
+ "pretraining_tp": 1,
119
+ "rms_norm_eps": 1e-05,
120
+ "rope_scaling": null,
121
+ "rope_theta": 10000.0,
122
+ "tie_word_embeddings": false,
123
+ "torch_dtype": "float32",
124
+ "transformers_version": "4.53.2",
125
+ "use_cache": false,
126
+ "vocab_size": 32002
127
+ }
128
+
129
+ [2025-07-13 00:34:57,656][transformers.modeling_utils][INFO] - loading weights file model.safetensors from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/model.safetensors.index.json
130
+ [2025-07-13 00:34:57,656][transformers.modeling_utils][INFO] - Will use torch_dtype=torch.float32 as defined in model's config object
131
+ [2025-07-13 00:34:57,656][transformers.modeling_utils][INFO] - Instantiating LlamaForSequenceClassification model under default dtype torch.float32.
132
+ [2025-07-13 00:34:57,657][transformers.modeling_utils][WARNING] - Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaForSequenceClassification is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)`
133
+ [2025-07-13 00:35:01,964][transformers.modeling_utils][INFO] - Some weights of the model checkpoint at TucanoBR/Tucano-2b4-Instruct were not used when initializing LlamaForSequenceClassification: ['lm_head.weight']
134
+ - This IS expected if you are initializing LlamaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
135
+ - This IS NOT expected if you are initializing LlamaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
136
+ [2025-07-13 00:35:01,965][transformers.modeling_utils][WARNING] - Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at TucanoBR/Tucano-2b4-Instruct and are newly initialized: ['score.weight']
137
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
138
+ [2025-07-13 00:35:03,281][__main__][INFO] - Initialized new PEFT model for ce loss
139
+ [2025-07-13 00:35:03,283][__main__][INFO] - None
140
+ [2025-07-13 00:35:03,284][transformers.training_args][INFO] - PyTorch: setting up devices
141
+ [2025-07-13 00:35:03,316][__main__][INFO] - Total steps: 620. Number of warmup steps: 62
142
+ [2025-07-13 00:35:03,331][transformers.trainer][INFO] - You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
143
+ [2025-07-13 00:35:03,350][transformers.trainer][INFO] - Using auto half precision backend
144
+ [2025-07-13 00:35:03,350][transformers.trainer][WARNING] - No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
145
+ [2025-07-13 00:35:03,351][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
146
+ [2025-07-13 00:35:03,364][transformers.trainer][INFO] -
147
+ ***** Running Evaluation *****
148
+ [2025-07-13 00:35:03,365][transformers.trainer][INFO] - Num examples = 132
149
+ [2025-07-13 00:35:03,365][transformers.trainer][INFO] - Batch size = 4
150
+ [2025-07-13 00:35:03,553][transformers.modeling_flash_attention_utils][WARNING] - The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.bfloat16.
151
+ [2025-07-13 00:35:11,564][transformers.trainer][INFO] - The following columns in the Training set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
152
+ [2025-07-13 00:35:11,605][transformers.trainer][INFO] - ***** Running training *****
153
+ [2025-07-13 00:35:11,605][transformers.trainer][INFO] - Num examples = 500
154
+ [2025-07-13 00:35:11,605][transformers.trainer][INFO] - Num Epochs = 20
155
+ [2025-07-13 00:35:11,605][transformers.trainer][INFO] - Instantaneous batch size per device = 8
156
+ [2025-07-13 00:35:11,606][transformers.trainer][INFO] - Total train batch size (w. parallel, distributed & accumulation) = 16
157
+ [2025-07-13 00:35:11,606][transformers.trainer][INFO] - Gradient Accumulation steps = 2
158
+ [2025-07-13 00:35:11,606][transformers.trainer][INFO] - Total optimization steps = 640
159
+ [2025-07-13 00:35:11,608][transformers.trainer][INFO] - Number of trainable parameters = 21,150,720
160
+ [2025-07-13 00:36:51,446][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
161
+ [2025-07-13 00:36:51,449][transformers.trainer][INFO] -
162
+ ***** Running Evaluation *****
163
+ [2025-07-13 00:36:51,449][transformers.trainer][INFO] - Num examples = 132
164
+ [2025-07-13 00:36:51,449][transformers.trainer][INFO] - Batch size = 4
165
+ [2025-07-13 00:36:59,227][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-32
166
+ [2025-07-13 00:36:59,586][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
167
+ [2025-07-13 00:36:59,586][transformers.configuration_utils][INFO] - Model config LlamaConfig {
168
+ "architectures": [
169
+ "LlamaForCausalLM"
170
+ ],
171
+ "attention_bias": false,
172
+ "attention_dropout": 0.0,
173
+ "bos_token_id": 1,
174
+ "eos_token_id": 2,
175
+ "head_dim": 160,
176
+ "hidden_act": "silu",
177
+ "hidden_size": 2560,
178
+ "initializer_range": 0.02,
179
+ "intermediate_size": 10240,
180
+ "max_position_embeddings": 4096,
181
+ "mlp_bias": false,
182
+ "model_type": "llama",
183
+ "num_attention_heads": 16,
184
+ "num_hidden_layers": 24,
185
+ "num_key_value_heads": 4,
186
+ "pad_token_id": 3,
187
+ "pretraining_tp": 1,
188
+ "rms_norm_eps": 1e-05,
189
+ "rope_scaling": null,
190
+ "rope_theta": 10000.0,
191
+ "tie_word_embeddings": false,
192
+ "torch_dtype": "float32",
193
+ "transformers_version": "4.53.2",
194
+ "use_cache": false,
195
+ "vocab_size": 32002
196
+ }
197
+
198
+ [2025-07-13 00:38:39,653][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
199
+ [2025-07-13 00:38:39,657][transformers.trainer][INFO] -
200
+ ***** Running Evaluation *****
201
+ [2025-07-13 00:38:39,657][transformers.trainer][INFO] - Num examples = 132
202
+ [2025-07-13 00:38:39,657][transformers.trainer][INFO] - Batch size = 4
203
+ [2025-07-13 00:38:47,444][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-64
204
+ [2025-07-13 00:38:47,804][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
205
+ [2025-07-13 00:38:47,805][transformers.configuration_utils][INFO] - Model config LlamaConfig {
206
+ "architectures": [
207
+ "LlamaForCausalLM"
208
+ ],
209
+ "attention_bias": false,
210
+ "attention_dropout": 0.0,
211
+ "bos_token_id": 1,
212
+ "eos_token_id": 2,
213
+ "head_dim": 160,
214
+ "hidden_act": "silu",
215
+ "hidden_size": 2560,
216
+ "initializer_range": 0.02,
217
+ "intermediate_size": 10240,
218
+ "max_position_embeddings": 4096,
219
+ "mlp_bias": false,
220
+ "model_type": "llama",
221
+ "num_attention_heads": 16,
222
+ "num_hidden_layers": 24,
223
+ "num_key_value_heads": 4,
224
+ "pad_token_id": 3,
225
+ "pretraining_tp": 1,
226
+ "rms_norm_eps": 1e-05,
227
+ "rope_scaling": null,
228
+ "rope_theta": 10000.0,
229
+ "tie_word_embeddings": false,
230
+ "torch_dtype": "float32",
231
+ "transformers_version": "4.53.2",
232
+ "use_cache": false,
233
+ "vocab_size": 32002
234
+ }
235
+
236
+ [2025-07-13 00:38:48,179][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-32] due to args.save_total_limit
237
+ [2025-07-13 00:40:27,857][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
238
+ [2025-07-13 00:40:27,860][transformers.trainer][INFO] -
239
+ ***** Running Evaluation *****
240
+ [2025-07-13 00:40:27,860][transformers.trainer][INFO] - Num examples = 132
241
+ [2025-07-13 00:40:27,860][transformers.trainer][INFO] - Batch size = 4
242
+ [2025-07-13 00:40:35,640][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-96
243
+ [2025-07-13 00:40:35,994][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
244
+ [2025-07-13 00:40:35,994][transformers.configuration_utils][INFO] - Model config LlamaConfig {
245
+ "architectures": [
246
+ "LlamaForCausalLM"
247
+ ],
248
+ "attention_bias": false,
249
+ "attention_dropout": 0.0,
250
+ "bos_token_id": 1,
251
+ "eos_token_id": 2,
252
+ "head_dim": 160,
253
+ "hidden_act": "silu",
254
+ "hidden_size": 2560,
255
+ "initializer_range": 0.02,
256
+ "intermediate_size": 10240,
257
+ "max_position_embeddings": 4096,
258
+ "mlp_bias": false,
259
+ "model_type": "llama",
260
+ "num_attention_heads": 16,
261
+ "num_hidden_layers": 24,
262
+ "num_key_value_heads": 4,
263
+ "pad_token_id": 3,
264
+ "pretraining_tp": 1,
265
+ "rms_norm_eps": 1e-05,
266
+ "rope_scaling": null,
267
+ "rope_theta": 10000.0,
268
+ "tie_word_embeddings": false,
269
+ "torch_dtype": "float32",
270
+ "transformers_version": "4.53.2",
271
+ "use_cache": false,
272
+ "vocab_size": 32002
273
+ }
274
+
275
+ [2025-07-13 00:40:36,381][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-64] due to args.save_total_limit
276
+ [2025-07-13 00:42:16,069][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
277
+ [2025-07-13 00:42:16,073][transformers.trainer][INFO] -
278
+ ***** Running Evaluation *****
279
+ [2025-07-13 00:42:16,073][transformers.trainer][INFO] - Num examples = 132
280
+ [2025-07-13 00:42:16,073][transformers.trainer][INFO] - Batch size = 4
281
+ [2025-07-13 00:42:23,848][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-128
282
+ [2025-07-13 00:42:24,268][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
283
+ [2025-07-13 00:42:24,268][transformers.configuration_utils][INFO] - Model config LlamaConfig {
284
+ "architectures": [
285
+ "LlamaForCausalLM"
286
+ ],
287
+ "attention_bias": false,
288
+ "attention_dropout": 0.0,
289
+ "bos_token_id": 1,
290
+ "eos_token_id": 2,
291
+ "head_dim": 160,
292
+ "hidden_act": "silu",
293
+ "hidden_size": 2560,
294
+ "initializer_range": 0.02,
295
+ "intermediate_size": 10240,
296
+ "max_position_embeddings": 4096,
297
+ "mlp_bias": false,
298
+ "model_type": "llama",
299
+ "num_attention_heads": 16,
300
+ "num_hidden_layers": 24,
301
+ "num_key_value_heads": 4,
302
+ "pad_token_id": 3,
303
+ "pretraining_tp": 1,
304
+ "rms_norm_eps": 1e-05,
305
+ "rope_scaling": null,
306
+ "rope_theta": 10000.0,
307
+ "tie_word_embeddings": false,
308
+ "torch_dtype": "float32",
309
+ "transformers_version": "4.53.2",
310
+ "use_cache": false,
311
+ "vocab_size": 32002
312
+ }
313
+
314
+ [2025-07-13 00:42:24,682][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-96] due to args.save_total_limit
315
+ [2025-07-13 00:44:04,382][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
316
+ [2025-07-13 00:44:04,385][transformers.trainer][INFO] -
317
+ ***** Running Evaluation *****
318
+ [2025-07-13 00:44:04,385][transformers.trainer][INFO] - Num examples = 132
319
+ [2025-07-13 00:44:04,385][transformers.trainer][INFO] - Batch size = 4
320
+ [2025-07-13 00:44:12,163][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-160
321
+ [2025-07-13 00:44:12,518][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
322
+ [2025-07-13 00:44:12,518][transformers.configuration_utils][INFO] - Model config LlamaConfig {
323
+ "architectures": [
324
+ "LlamaForCausalLM"
325
+ ],
326
+ "attention_bias": false,
327
+ "attention_dropout": 0.0,
328
+ "bos_token_id": 1,
329
+ "eos_token_id": 2,
330
+ "head_dim": 160,
331
+ "hidden_act": "silu",
332
+ "hidden_size": 2560,
333
+ "initializer_range": 0.02,
334
+ "intermediate_size": 10240,
335
+ "max_position_embeddings": 4096,
336
+ "mlp_bias": false,
337
+ "model_type": "llama",
338
+ "num_attention_heads": 16,
339
+ "num_hidden_layers": 24,
340
+ "num_key_value_heads": 4,
341
+ "pad_token_id": 3,
342
+ "pretraining_tp": 1,
343
+ "rms_norm_eps": 1e-05,
344
+ "rope_scaling": null,
345
+ "rope_theta": 10000.0,
346
+ "tie_word_embeddings": false,
347
+ "torch_dtype": "float32",
348
+ "transformers_version": "4.53.2",
349
+ "use_cache": false,
350
+ "vocab_size": 32002
351
+ }
352
+
353
+ [2025-07-13 00:44:12,885][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-128] due to args.save_total_limit
354
+ [2025-07-13 00:45:52,651][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
355
+ [2025-07-13 00:45:52,655][transformers.trainer][INFO] -
356
+ ***** Running Evaluation *****
357
+ [2025-07-13 00:45:52,655][transformers.trainer][INFO] - Num examples = 132
358
+ [2025-07-13 00:45:52,655][transformers.trainer][INFO] - Batch size = 4
359
+ [2025-07-13 00:46:00,434][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-192
360
+ [2025-07-13 00:46:00,796][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
361
+ [2025-07-13 00:46:00,797][transformers.configuration_utils][INFO] - Model config LlamaConfig {
362
+ "architectures": [
363
+ "LlamaForCausalLM"
364
+ ],
365
+ "attention_bias": false,
366
+ "attention_dropout": 0.0,
367
+ "bos_token_id": 1,
368
+ "eos_token_id": 2,
369
+ "head_dim": 160,
370
+ "hidden_act": "silu",
371
+ "hidden_size": 2560,
372
+ "initializer_range": 0.02,
373
+ "intermediate_size": 10240,
374
+ "max_position_embeddings": 4096,
375
+ "mlp_bias": false,
376
+ "model_type": "llama",
377
+ "num_attention_heads": 16,
378
+ "num_hidden_layers": 24,
379
+ "num_key_value_heads": 4,
380
+ "pad_token_id": 3,
381
+ "pretraining_tp": 1,
382
+ "rms_norm_eps": 1e-05,
383
+ "rope_scaling": null,
384
+ "rope_theta": 10000.0,
385
+ "tie_word_embeddings": false,
386
+ "torch_dtype": "float32",
387
+ "transformers_version": "4.53.2",
388
+ "use_cache": false,
389
+ "vocab_size": 32002
390
+ }
391
+
392
+ [2025-07-13 00:47:40,827][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
393
+ [2025-07-13 00:47:40,830][transformers.trainer][INFO] -
394
+ ***** Running Evaluation *****
395
+ [2025-07-13 00:47:40,830][transformers.trainer][INFO] - Num examples = 132
396
+ [2025-07-13 00:47:40,831][transformers.trainer][INFO] - Batch size = 4
397
+ [2025-07-13 00:47:48,604][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-224
398
+ [2025-07-13 00:47:49,246][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
399
+ [2025-07-13 00:47:49,246][transformers.configuration_utils][INFO] - Model config LlamaConfig {
400
+ "architectures": [
401
+ "LlamaForCausalLM"
402
+ ],
403
+ "attention_bias": false,
404
+ "attention_dropout": 0.0,
405
+ "bos_token_id": 1,
406
+ "eos_token_id": 2,
407
+ "head_dim": 160,
408
+ "hidden_act": "silu",
409
+ "hidden_size": 2560,
410
+ "initializer_range": 0.02,
411
+ "intermediate_size": 10240,
412
+ "max_position_embeddings": 4096,
413
+ "mlp_bias": false,
414
+ "model_type": "llama",
415
+ "num_attention_heads": 16,
416
+ "num_hidden_layers": 24,
417
+ "num_key_value_heads": 4,
418
+ "pad_token_id": 3,
419
+ "pretraining_tp": 1,
420
+ "rms_norm_eps": 1e-05,
421
+ "rope_scaling": null,
422
+ "rope_theta": 10000.0,
423
+ "tie_word_embeddings": false,
424
+ "torch_dtype": "float32",
425
+ "transformers_version": "4.53.2",
426
+ "use_cache": false,
427
+ "vocab_size": 32002
428
+ }
429
+
430
+ [2025-07-13 00:47:49,680][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-192] due to args.save_total_limit
431
+ [2025-07-13 00:49:29,358][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
432
+ [2025-07-13 00:49:29,361][transformers.trainer][INFO] -
433
+ ***** Running Evaluation *****
434
+ [2025-07-13 00:49:29,361][transformers.trainer][INFO] - Num examples = 132
435
+ [2025-07-13 00:49:29,361][transformers.trainer][INFO] - Batch size = 4
436
+ [2025-07-13 00:49:37,148][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-256
437
+ [2025-07-13 00:49:37,504][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
438
+ [2025-07-13 00:49:37,504][transformers.configuration_utils][INFO] - Model config LlamaConfig {
439
+ "architectures": [
440
+ "LlamaForCausalLM"
441
+ ],
442
+ "attention_bias": false,
443
+ "attention_dropout": 0.0,
444
+ "bos_token_id": 1,
445
+ "eos_token_id": 2,
446
+ "head_dim": 160,
447
+ "hidden_act": "silu",
448
+ "hidden_size": 2560,
449
+ "initializer_range": 0.02,
450
+ "intermediate_size": 10240,
451
+ "max_position_embeddings": 4096,
452
+ "mlp_bias": false,
453
+ "model_type": "llama",
454
+ "num_attention_heads": 16,
455
+ "num_hidden_layers": 24,
456
+ "num_key_value_heads": 4,
457
+ "pad_token_id": 3,
458
+ "pretraining_tp": 1,
459
+ "rms_norm_eps": 1e-05,
460
+ "rope_scaling": null,
461
+ "rope_theta": 10000.0,
462
+ "tie_word_embeddings": false,
463
+ "torch_dtype": "float32",
464
+ "transformers_version": "4.53.2",
465
+ "use_cache": false,
466
+ "vocab_size": 32002
467
+ }
468
+
469
+ [2025-07-13 00:49:37,883][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-160] due to args.save_total_limit
470
+ [2025-07-13 00:49:37,902][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-224] due to args.save_total_limit
471
+ [2025-07-13 00:51:17,577][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
472
+ [2025-07-13 00:51:17,581][transformers.trainer][INFO] -
473
+ ***** Running Evaluation *****
474
+ [2025-07-13 00:51:17,581][transformers.trainer][INFO] - Num examples = 132
475
+ [2025-07-13 00:51:17,581][transformers.trainer][INFO] - Batch size = 4
476
+ [2025-07-13 00:51:25,362][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-288
477
+ [2025-07-13 00:51:25,724][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
478
+ [2025-07-13 00:51:25,724][transformers.configuration_utils][INFO] - Model config LlamaConfig {
479
+ "architectures": [
480
+ "LlamaForCausalLM"
481
+ ],
482
+ "attention_bias": false,
483
+ "attention_dropout": 0.0,
484
+ "bos_token_id": 1,
485
+ "eos_token_id": 2,
486
+ "head_dim": 160,
487
+ "hidden_act": "silu",
488
+ "hidden_size": 2560,
489
+ "initializer_range": 0.02,
490
+ "intermediate_size": 10240,
491
+ "max_position_embeddings": 4096,
492
+ "mlp_bias": false,
493
+ "model_type": "llama",
494
+ "num_attention_heads": 16,
495
+ "num_hidden_layers": 24,
496
+ "num_key_value_heads": 4,
497
+ "pad_token_id": 3,
498
+ "pretraining_tp": 1,
499
+ "rms_norm_eps": 1e-05,
500
+ "rope_scaling": null,
501
+ "rope_theta": 10000.0,
502
+ "tie_word_embeddings": false,
503
+ "torch_dtype": "float32",
504
+ "transformers_version": "4.53.2",
505
+ "use_cache": false,
506
+ "vocab_size": 32002
507
+ }
508
+
509
+ [2025-07-13 00:53:05,750][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
510
+ [2025-07-13 00:53:05,753][transformers.trainer][INFO] -
511
+ ***** Running Evaluation *****
512
+ [2025-07-13 00:53:05,754][transformers.trainer][INFO] - Num examples = 132
513
+ [2025-07-13 00:53:05,754][transformers.trainer][INFO] - Batch size = 4
514
+ [2025-07-13 00:53:13,533][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-320
515
+ [2025-07-13 00:53:13,901][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
516
+ [2025-07-13 00:53:13,902][transformers.configuration_utils][INFO] - Model config LlamaConfig {
517
+ "architectures": [
518
+ "LlamaForCausalLM"
519
+ ],
520
+ "attention_bias": false,
521
+ "attention_dropout": 0.0,
522
+ "bos_token_id": 1,
523
+ "eos_token_id": 2,
524
+ "head_dim": 160,
525
+ "hidden_act": "silu",
526
+ "hidden_size": 2560,
527
+ "initializer_range": 0.02,
528
+ "intermediate_size": 10240,
529
+ "max_position_embeddings": 4096,
530
+ "mlp_bias": false,
531
+ "model_type": "llama",
532
+ "num_attention_heads": 16,
533
+ "num_hidden_layers": 24,
534
+ "num_key_value_heads": 4,
535
+ "pad_token_id": 3,
536
+ "pretraining_tp": 1,
537
+ "rms_norm_eps": 1e-05,
538
+ "rope_scaling": null,
539
+ "rope_theta": 10000.0,
540
+ "tie_word_embeddings": false,
541
+ "torch_dtype": "float32",
542
+ "transformers_version": "4.53.2",
543
+ "use_cache": false,
544
+ "vocab_size": 32002
545
+ }
546
+
547
+ [2025-07-13 00:53:14,293][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-256] due to args.save_total_limit
548
+ [2025-07-13 00:53:14,310][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-288] due to args.save_total_limit
549
+ [2025-07-13 00:54:53,990][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
550
+ [2025-07-13 00:54:53,993][transformers.trainer][INFO] -
551
+ ***** Running Evaluation *****
552
+ [2025-07-13 00:54:53,993][transformers.trainer][INFO] - Num examples = 132
553
+ [2025-07-13 00:54:53,993][transformers.trainer][INFO] - Batch size = 4
554
+ [2025-07-13 00:55:01,772][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-352
555
+ [2025-07-13 00:55:02,153][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
556
+ [2025-07-13 00:55:02,154][transformers.configuration_utils][INFO] - Model config LlamaConfig {
557
+ "architectures": [
558
+ "LlamaForCausalLM"
559
+ ],
560
+ "attention_bias": false,
561
+ "attention_dropout": 0.0,
562
+ "bos_token_id": 1,
563
+ "eos_token_id": 2,
564
+ "head_dim": 160,
565
+ "hidden_act": "silu",
566
+ "hidden_size": 2560,
567
+ "initializer_range": 0.02,
568
+ "intermediate_size": 10240,
569
+ "max_position_embeddings": 4096,
570
+ "mlp_bias": false,
571
+ "model_type": "llama",
572
+ "num_attention_heads": 16,
573
+ "num_hidden_layers": 24,
574
+ "num_key_value_heads": 4,
575
+ "pad_token_id": 3,
576
+ "pretraining_tp": 1,
577
+ "rms_norm_eps": 1e-05,
578
+ "rope_scaling": null,
579
+ "rope_theta": 10000.0,
580
+ "tie_word_embeddings": false,
581
+ "torch_dtype": "float32",
582
+ "transformers_version": "4.53.2",
583
+ "use_cache": false,
584
+ "vocab_size": 32002
585
+ }
586
+
587
+ [2025-07-13 00:56:42,258][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
588
+ [2025-07-13 00:56:42,262][transformers.trainer][INFO] -
589
+ ***** Running Evaluation *****
590
+ [2025-07-13 00:56:42,262][transformers.trainer][INFO] - Num examples = 132
591
+ [2025-07-13 00:56:42,262][transformers.trainer][INFO] - Batch size = 4
592
+ [2025-07-13 00:56:50,042][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-384
593
+ [2025-07-13 00:56:50,395][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
594
+ [2025-07-13 00:56:50,395][transformers.configuration_utils][INFO] - Model config LlamaConfig {
595
+ "architectures": [
596
+ "LlamaForCausalLM"
597
+ ],
598
+ "attention_bias": false,
599
+ "attention_dropout": 0.0,
600
+ "bos_token_id": 1,
601
+ "eos_token_id": 2,
602
+ "head_dim": 160,
603
+ "hidden_act": "silu",
604
+ "hidden_size": 2560,
605
+ "initializer_range": 0.02,
606
+ "intermediate_size": 10240,
607
+ "max_position_embeddings": 4096,
608
+ "mlp_bias": false,
609
+ "model_type": "llama",
610
+ "num_attention_heads": 16,
611
+ "num_hidden_layers": 24,
612
+ "num_key_value_heads": 4,
613
+ "pad_token_id": 3,
614
+ "pretraining_tp": 1,
615
+ "rms_norm_eps": 1e-05,
616
+ "rope_scaling": null,
617
+ "rope_theta": 10000.0,
618
+ "tie_word_embeddings": false,
619
+ "torch_dtype": "float32",
620
+ "transformers_version": "4.53.2",
621
+ "use_cache": false,
622
+ "vocab_size": 32002
623
+ }
624
+
625
+ [2025-07-13 00:56:50,787][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-352] due to args.save_total_limit
626
+ [2025-07-13 00:58:30,455][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
627
+ [2025-07-13 00:58:30,458][transformers.trainer][INFO] -
628
+ ***** Running Evaluation *****
629
+ [2025-07-13 00:58:30,458][transformers.trainer][INFO] - Num examples = 132
630
+ [2025-07-13 00:58:30,458][transformers.trainer][INFO] - Batch size = 4
631
+ [2025-07-13 00:58:38,242][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-416
632
+ [2025-07-13 00:58:38,595][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
633
+ [2025-07-13 00:58:38,596][transformers.configuration_utils][INFO] - Model config LlamaConfig {
634
+ "architectures": [
635
+ "LlamaForCausalLM"
636
+ ],
637
+ "attention_bias": false,
638
+ "attention_dropout": 0.0,
639
+ "bos_token_id": 1,
640
+ "eos_token_id": 2,
641
+ "head_dim": 160,
642
+ "hidden_act": "silu",
643
+ "hidden_size": 2560,
644
+ "initializer_range": 0.02,
645
+ "intermediate_size": 10240,
646
+ "max_position_embeddings": 4096,
647
+ "mlp_bias": false,
648
+ "model_type": "llama",
649
+ "num_attention_heads": 16,
650
+ "num_hidden_layers": 24,
651
+ "num_key_value_heads": 4,
652
+ "pad_token_id": 3,
653
+ "pretraining_tp": 1,
654
+ "rms_norm_eps": 1e-05,
655
+ "rope_scaling": null,
656
+ "rope_theta": 10000.0,
657
+ "tie_word_embeddings": false,
658
+ "torch_dtype": "float32",
659
+ "transformers_version": "4.53.2",
660
+ "use_cache": false,
661
+ "vocab_size": 32002
662
+ }
663
+
664
+ [2025-07-13 00:58:38,982][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-384] due to args.save_total_limit
665
+ [2025-07-13 01:00:18,662][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
666
+ [2025-07-13 01:00:18,666][transformers.trainer][INFO] -
667
+ ***** Running Evaluation *****
668
+ [2025-07-13 01:00:18,666][transformers.trainer][INFO] - Num examples = 132
669
+ [2025-07-13 01:00:18,666][transformers.trainer][INFO] - Batch size = 4
670
+ [2025-07-13 01:00:26,450][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-448
671
+ [2025-07-13 01:00:26,813][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
672
+ [2025-07-13 01:00:26,813][transformers.configuration_utils][INFO] - Model config LlamaConfig {
673
+ "architectures": [
674
+ "LlamaForCausalLM"
675
+ ],
676
+ "attention_bias": false,
677
+ "attention_dropout": 0.0,
678
+ "bos_token_id": 1,
679
+ "eos_token_id": 2,
680
+ "head_dim": 160,
681
+ "hidden_act": "silu",
682
+ "hidden_size": 2560,
683
+ "initializer_range": 0.02,
684
+ "intermediate_size": 10240,
685
+ "max_position_embeddings": 4096,
686
+ "mlp_bias": false,
687
+ "model_type": "llama",
688
+ "num_attention_heads": 16,
689
+ "num_hidden_layers": 24,
690
+ "num_key_value_heads": 4,
691
+ "pad_token_id": 3,
692
+ "pretraining_tp": 1,
693
+ "rms_norm_eps": 1e-05,
694
+ "rope_scaling": null,
695
+ "rope_theta": 10000.0,
696
+ "tie_word_embeddings": false,
697
+ "torch_dtype": "float32",
698
+ "transformers_version": "4.53.2",
699
+ "use_cache": false,
700
+ "vocab_size": 32002
701
+ }
702
+
703
+ [2025-07-13 01:00:27,186][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-416] due to args.save_total_limit
704
+ [2025-07-13 01:02:07,034][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
705
+ [2025-07-13 01:02:07,037][transformers.trainer][INFO] -
706
+ ***** Running Evaluation *****
707
+ [2025-07-13 01:02:07,037][transformers.trainer][INFO] - Num examples = 132
708
+ [2025-07-13 01:02:07,037][transformers.trainer][INFO] - Batch size = 4
709
+ [2025-07-13 01:02:14,815][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-480
710
+ [2025-07-13 01:02:15,171][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
711
+ [2025-07-13 01:02:15,171][transformers.configuration_utils][INFO] - Model config LlamaConfig {
712
+ "architectures": [
713
+ "LlamaForCausalLM"
714
+ ],
715
+ "attention_bias": false,
716
+ "attention_dropout": 0.0,
717
+ "bos_token_id": 1,
718
+ "eos_token_id": 2,
719
+ "head_dim": 160,
720
+ "hidden_act": "silu",
721
+ "hidden_size": 2560,
722
+ "initializer_range": 0.02,
723
+ "intermediate_size": 10240,
724
+ "max_position_embeddings": 4096,
725
+ "mlp_bias": false,
726
+ "model_type": "llama",
727
+ "num_attention_heads": 16,
728
+ "num_hidden_layers": 24,
729
+ "num_key_value_heads": 4,
730
+ "pad_token_id": 3,
731
+ "pretraining_tp": 1,
732
+ "rms_norm_eps": 1e-05,
733
+ "rope_scaling": null,
734
+ "rope_theta": 10000.0,
735
+ "tie_word_embeddings": false,
736
+ "torch_dtype": "float32",
737
+ "transformers_version": "4.53.2",
738
+ "use_cache": false,
739
+ "vocab_size": 32002
740
+ }
741
+
742
+ [2025-07-13 01:02:15,580][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-448] due to args.save_total_limit
743
+ [2025-07-13 01:02:15,597][transformers.trainer][INFO] -
744
+
745
+ Training completed. Do not forget to share your model on huggingface.co/models =)
746
+
747
+
748
+ [2025-07-13 01:02:15,597][transformers.trainer][INFO] - Loading best model from /workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-320 (score: 0.48971663619744055).
749
+ [2025-07-13 01:02:15,695][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-13/00-34-45/results/checkpoint-480] due to args.save_total_limit
750
+ [2025-07-13 01:02:15,715][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
751
+ [2025-07-13 01:02:15,718][transformers.trainer][INFO] -
752
+ ***** Running Evaluation *****
753
+ [2025-07-13 01:02:15,718][transformers.trainer][INFO] - Num examples = 132
754
+ [2025-07-13 01:02:15,718][transformers.trainer][INFO] - Batch size = 4
755
+ [2025-07-13 01:02:23,490][__main__][INFO] - Training completed successfully.
756
+ [2025-07-13 01:02:23,490][__main__][INFO] - Running on Test
757
+ [2025-07-13 01:02:23,490][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text. If id_prompt, id, grades, prompt, reference, essay_year, essay_text, supporting_text are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
758
+ [2025-07-13 01:02:23,494][transformers.trainer][INFO] -
759
+ ***** Running Evaluation *****
760
+ [2025-07-13 01:02:23,494][transformers.trainer][INFO] - Num examples = 138
761
+ [2025-07-13 01:02:23,494][transformers.trainer][INFO] - Batch size = 4
762
+ [2025-07-13 01:02:31,776][__main__][INFO] - Test metrics: {'eval_loss': 1.9798592329025269, 'eval_model_preparation_time': 0.0094, 'eval_accuracy': 0.2608695652173913, 'eval_RMSE': 63.611137480744695, 'eval_QWK': 0.23515959980943302, 'eval_HDIV': 0.13043478260869568, 'eval_Macro_F1': 0.20788415534178248, 'eval_Micro_F1': 0.2608695652173913, 'eval_Weighted_F1': 0.26254044759571654, 'eval_TP_0': 0, 'eval_TN_0': 137, 'eval_FP_0': 0, 'eval_FN_0': 1, 'eval_TP_1': 4, 'eval_TN_1': 83, 'eval_FP_1': 26, 'eval_FN_1': 25, 'eval_TP_2': 7, 'eval_TN_2': 103, 'eval_FP_2': 17, 'eval_FN_2': 11, 'eval_TP_3': 9, 'eval_TN_3': 72, 'eval_FP_3': 21, 'eval_FN_3': 36, 'eval_TP_4': 14, 'eval_TN_4': 78, 'eval_FP_4': 22, 'eval_FN_4': 24, 'eval_TP_5': 2, 'eval_TN_5': 115, 'eval_FP_5': 16, 'eval_FN_5': 5, 'eval_runtime': 8.2728, 'eval_samples_per_second': 16.681, 'eval_steps_per_second': 4.231, 'epoch': 15.0}
763
+ [2025-07-13 01:02:31,777][transformers.trainer][INFO] - Saving model checkpoint to ./results/best_model
764
+ [2025-07-13 01:02:32,138][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
765
+ [2025-07-13 01:02:32,138][transformers.configuration_utils][INFO] - Model config LlamaConfig {
766
+ "architectures": [
767
+ "LlamaForCausalLM"
768
+ ],
769
+ "attention_bias": false,
770
+ "attention_dropout": 0.0,
771
+ "bos_token_id": 1,
772
+ "eos_token_id": 2,
773
+ "head_dim": 160,
774
+ "hidden_act": "silu",
775
+ "hidden_size": 2560,
776
+ "initializer_range": 0.02,
777
+ "intermediate_size": 10240,
778
+ "max_position_embeddings": 4096,
779
+ "mlp_bias": false,
780
+ "model_type": "llama",
781
+ "num_attention_heads": 16,
782
+ "num_hidden_layers": 24,
783
+ "num_key_value_heads": 4,
784
+ "pad_token_id": 3,
785
+ "pretraining_tp": 1,
786
+ "rms_norm_eps": 1e-05,
787
+ "rope_scaling": null,
788
+ "rope_theta": 10000.0,
789
+ "tie_word_embeddings": false,
790
+ "torch_dtype": "float32",
791
+ "transformers_version": "4.53.2",
792
+ "use_cache": false,
793
+ "vocab_size": 32002
794
+ }
795
+
796
+ [2025-07-13 01:02:32,310][transformers.tokenization_utils_base][INFO] - chat template saved in ./results/best_model/chat_template.jinja
797
+ [2025-07-13 01:02:32,311][transformers.tokenization_utils_base][INFO] - tokenizer config file saved in ./results/best_model/tokenizer_config.json
798
+ [2025-07-13 01:02:32,311][transformers.tokenization_utils_base][INFO] - Special tokens file saved in ./results/best_model/special_tokens_map.json
799
+ [2025-07-13 01:02:32,324][__main__][INFO] - Model and tokenizer saved to ./results/best_model
800
+ [2025-07-13 01:02:32,328][__main__][INFO] - Fine Tuning Finished.
801
+ [2025-07-13 01:02:32,841][__main__][INFO] - Total emissions: 0.0008 kg CO2eq
special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<instruction>",
4
+ "</instruction>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<s>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "</s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": "<|finetune_right_pad_id|>",
21
+ "unk_token": {
22
+ "content": "<unk>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ }
28
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "3": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32000": {
39
+ "content": "<instruction>",
40
+ "lstrip": false,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "32001": {
47
+ "content": "</instruction>",
48
+ "lstrip": false,
49
+ "normalized": true,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": true
53
+ }
54
+ },
55
+ "additional_special_tokens": [
56
+ "<instruction>",
57
+ "</instruction>"
58
+ ],
59
+ "bos_token": "<s>",
60
+ "bos_token_id": 1,
61
+ "clean_up_tokenization_spaces": false,
62
+ "eos_token": "</s>",
63
+ "eos_token_id": 2,
64
+ "extra_special_tokens": {},
65
+ "legacy": false,
66
+ "model_max_length": 4096,
67
+ "pad_token": "<|finetune_right_pad_id|>",
68
+ "pad_token_id": 0,
69
+ "padding_side": "right",
70
+ "sp_model_kwargs": {},
71
+ "tokenizer_class": "LlamaTokenizerFast",
72
+ "unk_token": "<unk>",
73
+ "unk_token_id": 0,
74
+ "use_default_system_prompt": false
75
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2110e6dedd642e3a2033876fd0d92470fa0c46a0a8af273627b63150c62b7c20
3
+ size 5777