abarbosa commited on
Commit
41ccce4
·
verified ·
1 Parent(s): 0bae5f1

Pushing fine-tuned model to Hugging Face Hub

Browse files
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ language:
4
+ - pt
5
+ - en
6
+ tags:
7
+ - aes
8
+ datasets:
9
+ - kamel-usp/aes_enem_dataset
10
+ base_model: TucanoBR/Tucano-2b4-Instruct
11
+ metrics:
12
+ - accuracy
13
+ - qwk
14
+ library_name: peft
15
+ model-index:
16
+ - name: Tucano-2b4-Instruct-tucano_classification_lora-C4-essay_only-r8
17
+ results:
18
+ - task:
19
+ type: text-classification
20
+ name: Automated Essay Score
21
+ dataset:
22
+ name: Automated Essay Score ENEM Dataset
23
+ type: kamel-usp/aes_enem_dataset
24
+ config: JBCS2025
25
+ split: test
26
+ metrics:
27
+ - name: Macro F1
28
+ type: f1
29
+ value: 0.3167126754083276
30
+ - name: QWK
31
+ type: qwk
32
+ value: 0.4956601187756966
33
+ - name: Weighted Macro F1
34
+ type: f1
35
+ value: 0.562617897400506
36
+ ---
37
+ # Model ID: Tucano-2b4-Instruct-tucano_classification_lora-C4-essay_only-r8
38
+ ## Results
39
+ | | test_data |
40
+ |:-----------------|------------:|
41
+ | eval_accuracy | 0.536232 |
42
+ | eval_RMSE | 33.3623 |
43
+ | eval_QWK | 0.49566 |
44
+ | eval_Macro_F1 | 0.316713 |
45
+ | eval_Weighted_F1 | 0.562618 |
46
+ | eval_Micro_F1 | 0.536232 |
47
+ | eval_HDIV | 0.00724638 |
48
+
adapter_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "TucanoBR/Tucano-2b4-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": [
22
+ "classifier",
23
+ "score"
24
+ ],
25
+ "peft_type": "LORA",
26
+ "qalora_group_size": 16,
27
+ "r": 8,
28
+ "rank_pattern": {},
29
+ "revision": null,
30
+ "target_modules": [
31
+ "k_proj",
32
+ "v_proj",
33
+ "o_proj",
34
+ "q_proj",
35
+ "up_proj",
36
+ "gate_proj",
37
+ "down_proj"
38
+ ],
39
+ "task_type": "SEQ_CLS",
40
+ "trainable_token_indices": null,
41
+ "use_dora": false,
42
+ "use_qalora": false,
43
+ "use_rslora": false
44
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:549ced4928e349a09ca6cb9bc3d5000ebe162798818841578567b4516973de55
3
+ size 42376520
chat_template.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ {% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '<instruction>' + message['content'].strip() + '</instruction>'}}{% elif message['role'] == 'assistant' %}{{ message['content'].strip() + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}
emissions.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ timestamp,project_name,run_id,experiment_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
2
+ 2025-07-12T18:38:49,jbcs2025,8e806995-0178-4511-8c91-474c62487379,Tucano-2b4-Instruct-tucano_classification_lora-C4-essay_only-r8,1027.268165109679,0.10069611183008358,9.802319905370812e-05,66.0,488.3762161485263,70.0,0.018567458944359284,0.1692618776316408,0.019623839063655273,0.20745317563965532,Japan,JPN,,,,Linux-5.15.0-130-generic-x86_64-with-glibc2.35,3.12.11,3.0.2,192,INTEL(R) XEON(R) PLATINUM 8558,1,1 x NVIDIA H200,139.69,35.69,2015.3516235351562,machine,N,1.0
evaluation_results.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ eval_loss,eval_model_preparation_time,eval_accuracy,eval_RMSE,eval_QWK,eval_HDIV,eval_Macro_F1,eval_Micro_F1,eval_Weighted_F1,eval_TP_0,eval_TN_0,eval_FP_0,eval_FN_0,eval_TP_1,eval_TN_1,eval_FP_1,eval_FN_1,eval_TP_2,eval_TN_2,eval_FP_2,eval_FN_2,eval_TP_3,eval_TN_3,eval_FP_3,eval_FN_3,eval_TP_4,eval_TN_4,eval_FP_4,eval_FN_4,eval_TP_5,eval_TN_5,eval_FP_5,eval_FN_5,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,reference,timestamp,id
2
+ 2.542485475540161,0.0093,0.07575757575757576,81.57539736063475,-0.10997426785933095,0.12121212121212122,0.025445292620865142,0.07575757575757576,0.017349063150589872,0,121,10,1,0,126,6,0,0,128,0,4,0,68,0,64,0,84,0,48,10,11,106,5,5.2658,25.068,6.267,-1,validation_before_training,2025-07-12 18:21:54,Tucano-2b4-Instruct-tucano_classification_lora-C4-essay_only-r8
3
+ 1.322453260421753,0.0093,0.45454545454545453,32.47376563543955,0.5142131979695432,0.0,0.23511618803601295,0.45454545454545453,0.46657349625215533,0,131,0,1,0,130,2,0,0,116,12,4,24,54,14,40,30,48,36,18,6,109,8,9,5.0205,26.292,6.573,13.0,validation_after_training,2025-07-12 18:21:54,Tucano-2b4-Instruct-tucano_classification_lora-C4-essay_only-r8
4
+ 1.2252135276794434,0.0093,0.5362318840579711,33.36230624913196,0.4956601187756966,0.007246376811594235,0.3167126754083276,0.5362318840579711,0.562617897400506,0,137,0,1,1,134,3,0,6,99,30,3,38,50,12,38,29,75,17,17,0,131,2,5,5.3534,25.778,6.538,13.0,test_results,2025-07-12 18:21:54,Tucano-2b4-Instruct-tucano_classification_lora-C4-essay_only-r8
run_experiment.log ADDED
@@ -0,0 +1,723 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-07-12 18:21:38,754][__main__][INFO] - cache_dir: /tmp/
2
+ dataset:
3
+ name: kamel-usp/aes_enem_dataset
4
+ split: JBCS2025
5
+ training_params:
6
+ seed: 42
7
+ num_train_epochs: 20
8
+ logging_steps: 100
9
+ metric_for_best_model: QWK
10
+ bf16: true
11
+ bootstrap:
12
+ enabled: true
13
+ n_bootstrap: 10000
14
+ bootstrap_seed: 42
15
+ metrics:
16
+ - QWK
17
+ - Macro_F1
18
+ - Weighted_F1
19
+ post_training_results:
20
+ model_path: /workspace/jbcs2025/outputs/2025-03-24/20-42-59
21
+ experiments:
22
+ model:
23
+ name: TucanoBR/Tucano-2b4-Instruct
24
+ type: tucano_classification_lora
25
+ num_labels: 6
26
+ output_dir: ./results/
27
+ logging_dir: ./logs/
28
+ best_model_dir: ./results/best_model
29
+ lora_r: 8
30
+ lora_dropout: 0.05
31
+ lora_alpha: 16
32
+ lora_target_modules: all-linear
33
+ checkpoint_path: ''
34
+ tokenizer:
35
+ name: TucanoBR/Tucano-2b4-Instruct
36
+ dataset:
37
+ grade_index: 3
38
+ use_full_context: false
39
+ training_params:
40
+ weight_decay: 0.01
41
+ warmup_ratio: 0.1
42
+ learning_rate: 5.0e-05
43
+ train_batch_size: 8
44
+ eval_batch_size: 4
45
+ gradient_accumulation_steps: 2
46
+ gradient_checkpointing: true
47
+
48
+ [2025-07-12 18:21:42,746][__main__][INFO] - GPU 0: NVIDIA H200 | TDP ≈ 700 W
49
+ [2025-07-12 18:21:42,746][__main__][INFO] - Starting the Fine Tuning training process.
50
+ [2025-07-12 18:21:47,504][transformers.tokenization_utils_base][INFO] - loading file tokenizer.model from cache at None
51
+ [2025-07-12 18:21:47,504][transformers.tokenization_utils_base][INFO] - loading file tokenizer.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/tokenizer.json
52
+ [2025-07-12 18:21:47,504][transformers.tokenization_utils_base][INFO] - loading file added_tokens.json from cache at None
53
+ [2025-07-12 18:21:47,504][transformers.tokenization_utils_base][INFO] - loading file special_tokens_map.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/special_tokens_map.json
54
+ [2025-07-12 18:21:47,504][transformers.tokenization_utils_base][INFO] - loading file tokenizer_config.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/tokenizer_config.json
55
+ [2025-07-12 18:21:47,504][transformers.tokenization_utils_base][INFO] - loading file chat_template.jinja from cache at None
56
+ [2025-07-12 18:21:47,546][transformers.tokenization_utils_base][INFO] - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
57
+ [2025-07-12 18:21:47,553][__main__][INFO] - Tokenizer function parameters- Padding:longest; Truncation: False; Use Full Context: False
58
+ [2025-07-12 18:21:48,395][__main__][INFO] -
59
+ Token statistics for 'train' split:
60
+ [2025-07-12 18:21:48,395][__main__][INFO] - Total examples: 500
61
+ [2025-07-12 18:21:48,396][__main__][INFO] - Min tokens: 2121
62
+ [2025-07-12 18:21:48,396][__main__][INFO] - Max tokens: 2121
63
+ [2025-07-12 18:21:48,396][__main__][INFO] - Avg tokens: 2121.00
64
+ [2025-07-12 18:21:48,396][__main__][INFO] - Std tokens: 0.00
65
+ [2025-07-12 18:21:48,558][__main__][INFO] -
66
+ Token statistics for 'validation' split:
67
+ [2025-07-12 18:21:48,558][__main__][INFO] - Total examples: 132
68
+ [2025-07-12 18:21:48,558][__main__][INFO] - Min tokens: 1863
69
+ [2025-07-12 18:21:48,558][__main__][INFO] - Max tokens: 1863
70
+ [2025-07-12 18:21:48,558][__main__][INFO] - Avg tokens: 1863.00
71
+ [2025-07-12 18:21:48,558][__main__][INFO] - Std tokens: 0.00
72
+ [2025-07-12 18:21:48,733][__main__][INFO] -
73
+ Token statistics for 'test' split:
74
+ [2025-07-12 18:21:48,733][__main__][INFO] - Total examples: 138
75
+ [2025-07-12 18:21:48,733][__main__][INFO] - Min tokens: 1916
76
+ [2025-07-12 18:21:48,733][__main__][INFO] - Max tokens: 1916
77
+ [2025-07-12 18:21:48,733][__main__][INFO] - Avg tokens: 1916.00
78
+ [2025-07-12 18:21:48,733][__main__][INFO] - Std tokens: 0.00
79
+ [2025-07-12 18:21:48,733][__main__][INFO] - If token statistics are the same (max, avg, min) keep in mind that this is due to batched tokenization and padding.
80
+ [2025-07-12 18:21:48,733][__main__][INFO] - Model max length: 4096. If it is the same as stats, then there is a high chance that sequences are being truncated.
81
+ [2025-07-12 18:21:48,919][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
82
+ [2025-07-12 18:21:48,920][transformers.configuration_utils][INFO] - Model config LlamaConfig {
83
+ "architectures": [
84
+ "LlamaForCausalLM"
85
+ ],
86
+ "attention_bias": false,
87
+ "attention_dropout": 0.0,
88
+ "bos_token_id": 1,
89
+ "eos_token_id": 2,
90
+ "head_dim": 160,
91
+ "hidden_act": "silu",
92
+ "hidden_size": 2560,
93
+ "id2label": {
94
+ "0": "LABEL_0",
95
+ "1": "LABEL_1",
96
+ "2": "LABEL_2",
97
+ "3": "LABEL_3",
98
+ "4": "LABEL_4",
99
+ "5": "LABEL_5"
100
+ },
101
+ "initializer_range": 0.02,
102
+ "intermediate_size": 10240,
103
+ "label2id": {
104
+ "LABEL_0": 0,
105
+ "LABEL_1": 1,
106
+ "LABEL_2": 2,
107
+ "LABEL_3": 3,
108
+ "LABEL_4": 4,
109
+ "LABEL_5": 5
110
+ },
111
+ "max_position_embeddings": 4096,
112
+ "mlp_bias": false,
113
+ "model_type": "llama",
114
+ "num_attention_heads": 16,
115
+ "num_hidden_layers": 24,
116
+ "num_key_value_heads": 4,
117
+ "pad_token_id": 3,
118
+ "pretraining_tp": 1,
119
+ "rms_norm_eps": 1e-05,
120
+ "rope_scaling": null,
121
+ "rope_theta": 10000.0,
122
+ "tie_word_embeddings": false,
123
+ "torch_dtype": "float32",
124
+ "transformers_version": "4.53.2",
125
+ "use_cache": false,
126
+ "vocab_size": 32002
127
+ }
128
+
129
+ [2025-07-12 18:21:49,063][transformers.modeling_utils][INFO] - loading weights file model.safetensors from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/model.safetensors.index.json
130
+ [2025-07-12 18:21:49,063][transformers.modeling_utils][INFO] - Will use torch_dtype=torch.float32 as defined in model's config object
131
+ [2025-07-12 18:21:49,063][transformers.modeling_utils][INFO] - Instantiating LlamaForSequenceClassification model under default dtype torch.float32.
132
+ [2025-07-12 18:21:49,065][transformers.modeling_utils][WARNING] - Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaForSequenceClassification is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)`
133
+ [2025-07-12 18:21:53,359][transformers.modeling_utils][INFO] - Some weights of the model checkpoint at TucanoBR/Tucano-2b4-Instruct were not used when initializing LlamaForSequenceClassification: ['lm_head.weight']
134
+ - This IS expected if you are initializing LlamaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
135
+ - This IS NOT expected if you are initializing LlamaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
136
+ [2025-07-12 18:21:53,359][transformers.modeling_utils][WARNING] - Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at TucanoBR/Tucano-2b4-Instruct and are newly initialized: ['score.weight']
137
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
138
+ [2025-07-12 18:21:54,226][__main__][INFO] - Initialized new PEFT model for ce loss
139
+ [2025-07-12 18:21:54,228][__main__][INFO] - None
140
+ [2025-07-12 18:21:54,229][transformers.training_args][INFO] - PyTorch: setting up devices
141
+ [2025-07-12 18:21:54,264][__main__][INFO] - Total steps: 620. Number of warmup steps: 62
142
+ [2025-07-12 18:21:54,279][transformers.trainer][INFO] - You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
143
+ [2025-07-12 18:21:54,298][transformers.trainer][INFO] - Using auto half precision backend
144
+ [2025-07-12 18:21:54,299][transformers.trainer][WARNING] - No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
145
+ [2025-07-12 18:21:54,300][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
146
+ [2025-07-12 18:21:54,313][transformers.trainer][INFO] -
147
+ ***** Running Evaluation *****
148
+ [2025-07-12 18:21:54,313][transformers.trainer][INFO] - Num examples = 132
149
+ [2025-07-12 18:21:54,313][transformers.trainer][INFO] - Batch size = 4
150
+ [2025-07-12 18:21:54,497][transformers.modeling_flash_attention_utils][WARNING] - The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.bfloat16.
151
+ [2025-07-12 18:21:59,763][transformers.trainer][INFO] - The following columns in the Training set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
152
+ [2025-07-12 18:21:59,805][transformers.trainer][INFO] - ***** Running training *****
153
+ [2025-07-12 18:21:59,805][transformers.trainer][INFO] - Num examples = 500
154
+ [2025-07-12 18:21:59,805][transformers.trainer][INFO] - Num Epochs = 20
155
+ [2025-07-12 18:21:59,805][transformers.trainer][INFO] - Instantaneous batch size per device = 8
156
+ [2025-07-12 18:21:59,805][transformers.trainer][INFO] - Total train batch size (w. parallel, distributed & accumulation) = 16
157
+ [2025-07-12 18:21:59,805][transformers.trainer][INFO] - Gradient Accumulation steps = 2
158
+ [2025-07-12 18:21:59,805][transformers.trainer][INFO] - Total optimization steps = 640
159
+ [2025-07-12 18:21:59,807][transformers.trainer][INFO] - Number of trainable parameters = 10,583,040
160
+ [2025-07-12 18:23:11,108][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
161
+ [2025-07-12 18:23:11,111][transformers.trainer][INFO] -
162
+ ***** Running Evaluation *****
163
+ [2025-07-12 18:23:11,111][transformers.trainer][INFO] - Num examples = 132
164
+ [2025-07-12 18:23:11,111][transformers.trainer][INFO] - Batch size = 4
165
+ [2025-07-12 18:23:16,127][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-32
166
+ [2025-07-12 18:23:16,495][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
167
+ [2025-07-12 18:23:16,496][transformers.configuration_utils][INFO] - Model config LlamaConfig {
168
+ "architectures": [
169
+ "LlamaForCausalLM"
170
+ ],
171
+ "attention_bias": false,
172
+ "attention_dropout": 0.0,
173
+ "bos_token_id": 1,
174
+ "eos_token_id": 2,
175
+ "head_dim": 160,
176
+ "hidden_act": "silu",
177
+ "hidden_size": 2560,
178
+ "initializer_range": 0.02,
179
+ "intermediate_size": 10240,
180
+ "max_position_embeddings": 4096,
181
+ "mlp_bias": false,
182
+ "model_type": "llama",
183
+ "num_attention_heads": 16,
184
+ "num_hidden_layers": 24,
185
+ "num_key_value_heads": 4,
186
+ "pad_token_id": 3,
187
+ "pretraining_tp": 1,
188
+ "rms_norm_eps": 1e-05,
189
+ "rope_scaling": null,
190
+ "rope_theta": 10000.0,
191
+ "tie_word_embeddings": false,
192
+ "torch_dtype": "float32",
193
+ "transformers_version": "4.53.2",
194
+ "use_cache": false,
195
+ "vocab_size": 32002
196
+ }
197
+
198
+ [2025-07-12 18:24:27,849][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
199
+ [2025-07-12 18:24:27,852][transformers.trainer][INFO] -
200
+ ***** Running Evaluation *****
201
+ [2025-07-12 18:24:27,852][transformers.trainer][INFO] - Num examples = 132
202
+ [2025-07-12 18:24:27,852][transformers.trainer][INFO] - Batch size = 4
203
+ [2025-07-12 18:24:32,869][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-64
204
+ [2025-07-12 18:24:33,225][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
205
+ [2025-07-12 18:24:33,226][transformers.configuration_utils][INFO] - Model config LlamaConfig {
206
+ "architectures": [
207
+ "LlamaForCausalLM"
208
+ ],
209
+ "attention_bias": false,
210
+ "attention_dropout": 0.0,
211
+ "bos_token_id": 1,
212
+ "eos_token_id": 2,
213
+ "head_dim": 160,
214
+ "hidden_act": "silu",
215
+ "hidden_size": 2560,
216
+ "initializer_range": 0.02,
217
+ "intermediate_size": 10240,
218
+ "max_position_embeddings": 4096,
219
+ "mlp_bias": false,
220
+ "model_type": "llama",
221
+ "num_attention_heads": 16,
222
+ "num_hidden_layers": 24,
223
+ "num_key_value_heads": 4,
224
+ "pad_token_id": 3,
225
+ "pretraining_tp": 1,
226
+ "rms_norm_eps": 1e-05,
227
+ "rope_scaling": null,
228
+ "rope_theta": 10000.0,
229
+ "tie_word_embeddings": false,
230
+ "torch_dtype": "float32",
231
+ "transformers_version": "4.53.2",
232
+ "use_cache": false,
233
+ "vocab_size": 32002
234
+ }
235
+
236
+ [2025-07-12 18:24:33,389][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-32] due to args.save_total_limit
237
+ [2025-07-12 18:25:44,618][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
238
+ [2025-07-12 18:25:44,621][transformers.trainer][INFO] -
239
+ ***** Running Evaluation *****
240
+ [2025-07-12 18:25:44,621][transformers.trainer][INFO] - Num examples = 132
241
+ [2025-07-12 18:25:44,621][transformers.trainer][INFO] - Batch size = 4
242
+ [2025-07-12 18:25:49,615][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-96
243
+ [2025-07-12 18:25:49,976][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
244
+ [2025-07-12 18:25:49,976][transformers.configuration_utils][INFO] - Model config LlamaConfig {
245
+ "architectures": [
246
+ "LlamaForCausalLM"
247
+ ],
248
+ "attention_bias": false,
249
+ "attention_dropout": 0.0,
250
+ "bos_token_id": 1,
251
+ "eos_token_id": 2,
252
+ "head_dim": 160,
253
+ "hidden_act": "silu",
254
+ "hidden_size": 2560,
255
+ "initializer_range": 0.02,
256
+ "intermediate_size": 10240,
257
+ "max_position_embeddings": 4096,
258
+ "mlp_bias": false,
259
+ "model_type": "llama",
260
+ "num_attention_heads": 16,
261
+ "num_hidden_layers": 24,
262
+ "num_key_value_heads": 4,
263
+ "pad_token_id": 3,
264
+ "pretraining_tp": 1,
265
+ "rms_norm_eps": 1e-05,
266
+ "rope_scaling": null,
267
+ "rope_theta": 10000.0,
268
+ "tie_word_embeddings": false,
269
+ "torch_dtype": "float32",
270
+ "transformers_version": "4.53.2",
271
+ "use_cache": false,
272
+ "vocab_size": 32002
273
+ }
274
+
275
+ [2025-07-12 18:25:50,187][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-64] due to args.save_total_limit
276
+ [2025-07-12 18:27:01,291][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
277
+ [2025-07-12 18:27:01,294][transformers.trainer][INFO] -
278
+ ***** Running Evaluation *****
279
+ [2025-07-12 18:27:01,294][transformers.trainer][INFO] - Num examples = 132
280
+ [2025-07-12 18:27:01,294][transformers.trainer][INFO] - Batch size = 4
281
+ [2025-07-12 18:27:06,303][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-128
282
+ [2025-07-12 18:27:06,667][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
283
+ [2025-07-12 18:27:06,668][transformers.configuration_utils][INFO] - Model config LlamaConfig {
284
+ "architectures": [
285
+ "LlamaForCausalLM"
286
+ ],
287
+ "attention_bias": false,
288
+ "attention_dropout": 0.0,
289
+ "bos_token_id": 1,
290
+ "eos_token_id": 2,
291
+ "head_dim": 160,
292
+ "hidden_act": "silu",
293
+ "hidden_size": 2560,
294
+ "initializer_range": 0.02,
295
+ "intermediate_size": 10240,
296
+ "max_position_embeddings": 4096,
297
+ "mlp_bias": false,
298
+ "model_type": "llama",
299
+ "num_attention_heads": 16,
300
+ "num_hidden_layers": 24,
301
+ "num_key_value_heads": 4,
302
+ "pad_token_id": 3,
303
+ "pretraining_tp": 1,
304
+ "rms_norm_eps": 1e-05,
305
+ "rope_scaling": null,
306
+ "rope_theta": 10000.0,
307
+ "tie_word_embeddings": false,
308
+ "torch_dtype": "float32",
309
+ "transformers_version": "4.53.2",
310
+ "use_cache": false,
311
+ "vocab_size": 32002
312
+ }
313
+
314
+ [2025-07-12 18:28:18,001][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
315
+ [2025-07-12 18:28:18,004][transformers.trainer][INFO] -
316
+ ***** Running Evaluation *****
317
+ [2025-07-12 18:28:18,004][transformers.trainer][INFO] - Num examples = 132
318
+ [2025-07-12 18:28:18,005][transformers.trainer][INFO] - Batch size = 4
319
+ [2025-07-12 18:28:23,007][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-160
320
+ [2025-07-12 18:28:23,386][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
321
+ [2025-07-12 18:28:23,387][transformers.configuration_utils][INFO] - Model config LlamaConfig {
322
+ "architectures": [
323
+ "LlamaForCausalLM"
324
+ ],
325
+ "attention_bias": false,
326
+ "attention_dropout": 0.0,
327
+ "bos_token_id": 1,
328
+ "eos_token_id": 2,
329
+ "head_dim": 160,
330
+ "hidden_act": "silu",
331
+ "hidden_size": 2560,
332
+ "initializer_range": 0.02,
333
+ "intermediate_size": 10240,
334
+ "max_position_embeddings": 4096,
335
+ "mlp_bias": false,
336
+ "model_type": "llama",
337
+ "num_attention_heads": 16,
338
+ "num_hidden_layers": 24,
339
+ "num_key_value_heads": 4,
340
+ "pad_token_id": 3,
341
+ "pretraining_tp": 1,
342
+ "rms_norm_eps": 1e-05,
343
+ "rope_scaling": null,
344
+ "rope_theta": 10000.0,
345
+ "tie_word_embeddings": false,
346
+ "torch_dtype": "float32",
347
+ "transformers_version": "4.53.2",
348
+ "use_cache": false,
349
+ "vocab_size": 32002
350
+ }
351
+
352
+ [2025-07-12 18:28:23,588][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-96] due to args.save_total_limit
353
+ [2025-07-12 18:28:23,596][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-128] due to args.save_total_limit
354
+ [2025-07-12 18:29:34,708][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
355
+ [2025-07-12 18:29:34,711][transformers.trainer][INFO] -
356
+ ***** Running Evaluation *****
357
+ [2025-07-12 18:29:34,711][transformers.trainer][INFO] - Num examples = 132
358
+ [2025-07-12 18:29:34,711][transformers.trainer][INFO] - Batch size = 4
359
+ [2025-07-12 18:29:39,708][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-192
360
+ [2025-07-12 18:29:40,065][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
361
+ [2025-07-12 18:29:40,066][transformers.configuration_utils][INFO] - Model config LlamaConfig {
362
+ "architectures": [
363
+ "LlamaForCausalLM"
364
+ ],
365
+ "attention_bias": false,
366
+ "attention_dropout": 0.0,
367
+ "bos_token_id": 1,
368
+ "eos_token_id": 2,
369
+ "head_dim": 160,
370
+ "hidden_act": "silu",
371
+ "hidden_size": 2560,
372
+ "initializer_range": 0.02,
373
+ "intermediate_size": 10240,
374
+ "max_position_embeddings": 4096,
375
+ "mlp_bias": false,
376
+ "model_type": "llama",
377
+ "num_attention_heads": 16,
378
+ "num_hidden_layers": 24,
379
+ "num_key_value_heads": 4,
380
+ "pad_token_id": 3,
381
+ "pretraining_tp": 1,
382
+ "rms_norm_eps": 1e-05,
383
+ "rope_scaling": null,
384
+ "rope_theta": 10000.0,
385
+ "tie_word_embeddings": false,
386
+ "torch_dtype": "float32",
387
+ "transformers_version": "4.53.2",
388
+ "use_cache": false,
389
+ "vocab_size": 32002
390
+ }
391
+
392
+ [2025-07-12 18:29:40,285][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-160] due to args.save_total_limit
393
+ [2025-07-12 18:30:51,376][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
394
+ [2025-07-12 18:30:51,379][transformers.trainer][INFO] -
395
+ ***** Running Evaluation *****
396
+ [2025-07-12 18:30:51,379][transformers.trainer][INFO] - Num examples = 132
397
+ [2025-07-12 18:30:51,379][transformers.trainer][INFO] - Batch size = 4
398
+ [2025-07-12 18:30:56,377][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-224
399
+ [2025-07-12 18:30:56,747][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
400
+ [2025-07-12 18:30:56,748][transformers.configuration_utils][INFO] - Model config LlamaConfig {
401
+ "architectures": [
402
+ "LlamaForCausalLM"
403
+ ],
404
+ "attention_bias": false,
405
+ "attention_dropout": 0.0,
406
+ "bos_token_id": 1,
407
+ "eos_token_id": 2,
408
+ "head_dim": 160,
409
+ "hidden_act": "silu",
410
+ "hidden_size": 2560,
411
+ "initializer_range": 0.02,
412
+ "intermediate_size": 10240,
413
+ "max_position_embeddings": 4096,
414
+ "mlp_bias": false,
415
+ "model_type": "llama",
416
+ "num_attention_heads": 16,
417
+ "num_hidden_layers": 24,
418
+ "num_key_value_heads": 4,
419
+ "pad_token_id": 3,
420
+ "pretraining_tp": 1,
421
+ "rms_norm_eps": 1e-05,
422
+ "rope_scaling": null,
423
+ "rope_theta": 10000.0,
424
+ "tie_word_embeddings": false,
425
+ "torch_dtype": "float32",
426
+ "transformers_version": "4.53.2",
427
+ "use_cache": false,
428
+ "vocab_size": 32002
429
+ }
430
+
431
+ [2025-07-12 18:32:08,365][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
432
+ [2025-07-12 18:32:08,369][transformers.trainer][INFO] -
433
+ ***** Running Evaluation *****
434
+ [2025-07-12 18:32:08,369][transformers.trainer][INFO] - Num examples = 132
435
+ [2025-07-12 18:32:08,369][transformers.trainer][INFO] - Batch size = 4
436
+ [2025-07-12 18:32:13,381][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-256
437
+ [2025-07-12 18:32:13,794][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
438
+ [2025-07-12 18:32:13,795][transformers.configuration_utils][INFO] - Model config LlamaConfig {
439
+ "architectures": [
440
+ "LlamaForCausalLM"
441
+ ],
442
+ "attention_bias": false,
443
+ "attention_dropout": 0.0,
444
+ "bos_token_id": 1,
445
+ "eos_token_id": 2,
446
+ "head_dim": 160,
447
+ "hidden_act": "silu",
448
+ "hidden_size": 2560,
449
+ "initializer_range": 0.02,
450
+ "intermediate_size": 10240,
451
+ "max_position_embeddings": 4096,
452
+ "mlp_bias": false,
453
+ "model_type": "llama",
454
+ "num_attention_heads": 16,
455
+ "num_hidden_layers": 24,
456
+ "num_key_value_heads": 4,
457
+ "pad_token_id": 3,
458
+ "pretraining_tp": 1,
459
+ "rms_norm_eps": 1e-05,
460
+ "rope_scaling": null,
461
+ "rope_theta": 10000.0,
462
+ "tie_word_embeddings": false,
463
+ "torch_dtype": "float32",
464
+ "transformers_version": "4.53.2",
465
+ "use_cache": false,
466
+ "vocab_size": 32002
467
+ }
468
+
469
+ [2025-07-12 18:32:13,992][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-192] due to args.save_total_limit
470
+ [2025-07-12 18:32:14,002][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-224] due to args.save_total_limit
471
+ [2025-07-12 18:33:25,165][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
472
+ [2025-07-12 18:33:25,168][transformers.trainer][INFO] -
473
+ ***** Running Evaluation *****
474
+ [2025-07-12 18:33:25,169][transformers.trainer][INFO] - Num examples = 132
475
+ [2025-07-12 18:33:25,169][transformers.trainer][INFO] - Batch size = 4
476
+ [2025-07-12 18:33:30,172][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-288
477
+ [2025-07-12 18:33:30,567][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
478
+ [2025-07-12 18:33:30,567][transformers.configuration_utils][INFO] - Model config LlamaConfig {
479
+ "architectures": [
480
+ "LlamaForCausalLM"
481
+ ],
482
+ "attention_bias": false,
483
+ "attention_dropout": 0.0,
484
+ "bos_token_id": 1,
485
+ "eos_token_id": 2,
486
+ "head_dim": 160,
487
+ "hidden_act": "silu",
488
+ "hidden_size": 2560,
489
+ "initializer_range": 0.02,
490
+ "intermediate_size": 10240,
491
+ "max_position_embeddings": 4096,
492
+ "mlp_bias": false,
493
+ "model_type": "llama",
494
+ "num_attention_heads": 16,
495
+ "num_hidden_layers": 24,
496
+ "num_key_value_heads": 4,
497
+ "pad_token_id": 3,
498
+ "pretraining_tp": 1,
499
+ "rms_norm_eps": 1e-05,
500
+ "rope_scaling": null,
501
+ "rope_theta": 10000.0,
502
+ "tie_word_embeddings": false,
503
+ "torch_dtype": "float32",
504
+ "transformers_version": "4.53.2",
505
+ "use_cache": false,
506
+ "vocab_size": 32002
507
+ }
508
+
509
+ [2025-07-12 18:34:41,929][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
510
+ [2025-07-12 18:34:41,932][transformers.trainer][INFO] -
511
+ ***** Running Evaluation *****
512
+ [2025-07-12 18:34:41,932][transformers.trainer][INFO] - Num examples = 132
513
+ [2025-07-12 18:34:41,932][transformers.trainer][INFO] - Batch size = 4
514
+ [2025-07-12 18:34:46,943][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-320
515
+ [2025-07-12 18:34:47,303][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
516
+ [2025-07-12 18:34:47,303][transformers.configuration_utils][INFO] - Model config LlamaConfig {
517
+ "architectures": [
518
+ "LlamaForCausalLM"
519
+ ],
520
+ "attention_bias": false,
521
+ "attention_dropout": 0.0,
522
+ "bos_token_id": 1,
523
+ "eos_token_id": 2,
524
+ "head_dim": 160,
525
+ "hidden_act": "silu",
526
+ "hidden_size": 2560,
527
+ "initializer_range": 0.02,
528
+ "intermediate_size": 10240,
529
+ "max_position_embeddings": 4096,
530
+ "mlp_bias": false,
531
+ "model_type": "llama",
532
+ "num_attention_heads": 16,
533
+ "num_hidden_layers": 24,
534
+ "num_key_value_heads": 4,
535
+ "pad_token_id": 3,
536
+ "pretraining_tp": 1,
537
+ "rms_norm_eps": 1e-05,
538
+ "rope_scaling": null,
539
+ "rope_theta": 10000.0,
540
+ "tie_word_embeddings": false,
541
+ "torch_dtype": "float32",
542
+ "transformers_version": "4.53.2",
543
+ "use_cache": false,
544
+ "vocab_size": 32002
545
+ }
546
+
547
+ [2025-07-12 18:34:47,483][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-288] due to args.save_total_limit
548
+ [2025-07-12 18:35:58,663][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
549
+ [2025-07-12 18:35:58,667][transformers.trainer][INFO] -
550
+ ***** Running Evaluation *****
551
+ [2025-07-12 18:35:58,667][transformers.trainer][INFO] - Num examples = 132
552
+ [2025-07-12 18:35:58,667][transformers.trainer][INFO] - Batch size = 4
553
+ [2025-07-12 18:36:03,689][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-352
554
+ [2025-07-12 18:36:04,045][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
555
+ [2025-07-12 18:36:04,045][transformers.configuration_utils][INFO] - Model config LlamaConfig {
556
+ "architectures": [
557
+ "LlamaForCausalLM"
558
+ ],
559
+ "attention_bias": false,
560
+ "attention_dropout": 0.0,
561
+ "bos_token_id": 1,
562
+ "eos_token_id": 2,
563
+ "head_dim": 160,
564
+ "hidden_act": "silu",
565
+ "hidden_size": 2560,
566
+ "initializer_range": 0.02,
567
+ "intermediate_size": 10240,
568
+ "max_position_embeddings": 4096,
569
+ "mlp_bias": false,
570
+ "model_type": "llama",
571
+ "num_attention_heads": 16,
572
+ "num_hidden_layers": 24,
573
+ "num_key_value_heads": 4,
574
+ "pad_token_id": 3,
575
+ "pretraining_tp": 1,
576
+ "rms_norm_eps": 1e-05,
577
+ "rope_scaling": null,
578
+ "rope_theta": 10000.0,
579
+ "tie_word_embeddings": false,
580
+ "torch_dtype": "float32",
581
+ "transformers_version": "4.53.2",
582
+ "use_cache": false,
583
+ "vocab_size": 32002
584
+ }
585
+
586
+ [2025-07-12 18:36:04,284][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-320] due to args.save_total_limit
587
+ [2025-07-12 18:37:15,425][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
588
+ [2025-07-12 18:37:15,428][transformers.trainer][INFO] -
589
+ ***** Running Evaluation *****
590
+ [2025-07-12 18:37:15,428][transformers.trainer][INFO] - Num examples = 132
591
+ [2025-07-12 18:37:15,428][transformers.trainer][INFO] - Batch size = 4
592
+ [2025-07-12 18:37:20,438][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-384
593
+ [2025-07-12 18:37:20,811][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
594
+ [2025-07-12 18:37:20,812][transformers.configuration_utils][INFO] - Model config LlamaConfig {
595
+ "architectures": [
596
+ "LlamaForCausalLM"
597
+ ],
598
+ "attention_bias": false,
599
+ "attention_dropout": 0.0,
600
+ "bos_token_id": 1,
601
+ "eos_token_id": 2,
602
+ "head_dim": 160,
603
+ "hidden_act": "silu",
604
+ "hidden_size": 2560,
605
+ "initializer_range": 0.02,
606
+ "intermediate_size": 10240,
607
+ "max_position_embeddings": 4096,
608
+ "mlp_bias": false,
609
+ "model_type": "llama",
610
+ "num_attention_heads": 16,
611
+ "num_hidden_layers": 24,
612
+ "num_key_value_heads": 4,
613
+ "pad_token_id": 3,
614
+ "pretraining_tp": 1,
615
+ "rms_norm_eps": 1e-05,
616
+ "rope_scaling": null,
617
+ "rope_theta": 10000.0,
618
+ "tie_word_embeddings": false,
619
+ "torch_dtype": "float32",
620
+ "transformers_version": "4.53.2",
621
+ "use_cache": false,
622
+ "vocab_size": 32002
623
+ }
624
+
625
+ [2025-07-12 18:37:20,986][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-352] due to args.save_total_limit
626
+ [2025-07-12 18:38:32,129][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
627
+ [2025-07-12 18:38:32,132][transformers.trainer][INFO] -
628
+ ***** Running Evaluation *****
629
+ [2025-07-12 18:38:32,132][transformers.trainer][INFO] - Num examples = 132
630
+ [2025-07-12 18:38:32,132][transformers.trainer][INFO] - Batch size = 4
631
+ [2025-07-12 18:38:37,142][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-416
632
+ [2025-07-12 18:38:37,504][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
633
+ [2025-07-12 18:38:37,505][transformers.configuration_utils][INFO] - Model config LlamaConfig {
634
+ "architectures": [
635
+ "LlamaForCausalLM"
636
+ ],
637
+ "attention_bias": false,
638
+ "attention_dropout": 0.0,
639
+ "bos_token_id": 1,
640
+ "eos_token_id": 2,
641
+ "head_dim": 160,
642
+ "hidden_act": "silu",
643
+ "hidden_size": 2560,
644
+ "initializer_range": 0.02,
645
+ "intermediate_size": 10240,
646
+ "max_position_embeddings": 4096,
647
+ "mlp_bias": false,
648
+ "model_type": "llama",
649
+ "num_attention_heads": 16,
650
+ "num_hidden_layers": 24,
651
+ "num_key_value_heads": 4,
652
+ "pad_token_id": 3,
653
+ "pretraining_tp": 1,
654
+ "rms_norm_eps": 1e-05,
655
+ "rope_scaling": null,
656
+ "rope_theta": 10000.0,
657
+ "tie_word_embeddings": false,
658
+ "torch_dtype": "float32",
659
+ "transformers_version": "4.53.2",
660
+ "use_cache": false,
661
+ "vocab_size": 32002
662
+ }
663
+
664
+ [2025-07-12 18:38:37,687][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-384] due to args.save_total_limit
665
+ [2025-07-12 18:38:37,695][transformers.trainer][INFO] -
666
+
667
+ Training completed. Do not forget to share your model on huggingface.co/models =)
668
+
669
+
670
+ [2025-07-12 18:38:37,695][transformers.trainer][INFO] - Loading best model from /workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-256 (score: 0.5142131979695432).
671
+ [2025-07-12 18:38:37,784][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/18-21-38/results/checkpoint-416] due to args.save_total_limit
672
+ [2025-07-12 18:38:37,794][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
673
+ [2025-07-12 18:38:37,797][transformers.trainer][INFO] -
674
+ ***** Running Evaluation *****
675
+ [2025-07-12 18:38:37,797][transformers.trainer][INFO] - Num examples = 132
676
+ [2025-07-12 18:38:37,797][transformers.trainer][INFO] - Batch size = 4
677
+ [2025-07-12 18:38:42,829][__main__][INFO] - Training completed successfully.
678
+ [2025-07-12 18:38:42,829][__main__][INFO] - Running on Test
679
+ [2025-07-12 18:38:42,829][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt. If prompt, supporting_text, essay_year, reference, grades, essay_text, id, id_prompt are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
680
+ [2025-07-12 18:38:42,832][transformers.trainer][INFO] -
681
+ ***** Running Evaluation *****
682
+ [2025-07-12 18:38:42,832][transformers.trainer][INFO] - Num examples = 138
683
+ [2025-07-12 18:38:42,832][transformers.trainer][INFO] - Batch size = 4
684
+ [2025-07-12 18:38:48,195][__main__][INFO] - Test metrics: {'eval_loss': 1.2252135276794434, 'eval_model_preparation_time': 0.0093, 'eval_accuracy': 0.5362318840579711, 'eval_RMSE': 33.36230624913196, 'eval_QWK': 0.4956601187756966, 'eval_HDIV': 0.007246376811594235, 'eval_Macro_F1': 0.3167126754083276, 'eval_Micro_F1': 0.5362318840579711, 'eval_Weighted_F1': 0.562617897400506, 'eval_TP_0': 0, 'eval_TN_0': 137, 'eval_FP_0': 0, 'eval_FN_0': 1, 'eval_TP_1': 1, 'eval_TN_1': 134, 'eval_FP_1': 3, 'eval_FN_1': 0, 'eval_TP_2': 6, 'eval_TN_2': 99, 'eval_FP_2': 30, 'eval_FN_2': 3, 'eval_TP_3': 38, 'eval_TN_3': 50, 'eval_FP_3': 12, 'eval_FN_3': 38, 'eval_TP_4': 29, 'eval_TN_4': 75, 'eval_FP_4': 17, 'eval_FN_4': 17, 'eval_TP_5': 0, 'eval_TN_5': 131, 'eval_FP_5': 2, 'eval_FN_5': 5, 'eval_runtime': 5.3534, 'eval_samples_per_second': 25.778, 'eval_steps_per_second': 6.538, 'epoch': 13.0}
685
+ [2025-07-12 18:38:48,195][transformers.trainer][INFO] - Saving model checkpoint to ./results/best_model
686
+ [2025-07-12 18:38:48,863][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
687
+ [2025-07-12 18:38:48,864][transformers.configuration_utils][INFO] - Model config LlamaConfig {
688
+ "architectures": [
689
+ "LlamaForCausalLM"
690
+ ],
691
+ "attention_bias": false,
692
+ "attention_dropout": 0.0,
693
+ "bos_token_id": 1,
694
+ "eos_token_id": 2,
695
+ "head_dim": 160,
696
+ "hidden_act": "silu",
697
+ "hidden_size": 2560,
698
+ "initializer_range": 0.02,
699
+ "intermediate_size": 10240,
700
+ "max_position_embeddings": 4096,
701
+ "mlp_bias": false,
702
+ "model_type": "llama",
703
+ "num_attention_heads": 16,
704
+ "num_hidden_layers": 24,
705
+ "num_key_value_heads": 4,
706
+ "pad_token_id": 3,
707
+ "pretraining_tp": 1,
708
+ "rms_norm_eps": 1e-05,
709
+ "rope_scaling": null,
710
+ "rope_theta": 10000.0,
711
+ "tie_word_embeddings": false,
712
+ "torch_dtype": "float32",
713
+ "transformers_version": "4.53.2",
714
+ "use_cache": false,
715
+ "vocab_size": 32002
716
+ }
717
+
718
+ [2025-07-12 18:38:48,981][transformers.tokenization_utils_base][INFO] - chat template saved in ./results/best_model/chat_template.jinja
719
+ [2025-07-12 18:38:48,981][transformers.tokenization_utils_base][INFO] - tokenizer config file saved in ./results/best_model/tokenizer_config.json
720
+ [2025-07-12 18:38:48,981][transformers.tokenization_utils_base][INFO] - Special tokens file saved in ./results/best_model/special_tokens_map.json
721
+ [2025-07-12 18:38:48,997][__main__][INFO] - Model and tokenizer saved to ./results/best_model
722
+ [2025-07-12 18:38:49,002][__main__][INFO] - Fine Tuning Finished.
723
+ [2025-07-12 18:38:49,509][__main__][INFO] - Total emissions: 0.1007 kg CO2eq
special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<instruction>",
4
+ "</instruction>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<s>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "</s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": "<|finetune_right_pad_id|>",
21
+ "unk_token": {
22
+ "content": "<unk>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ }
28
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "3": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32000": {
39
+ "content": "<instruction>",
40
+ "lstrip": false,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "32001": {
47
+ "content": "</instruction>",
48
+ "lstrip": false,
49
+ "normalized": true,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": true
53
+ }
54
+ },
55
+ "additional_special_tokens": [
56
+ "<instruction>",
57
+ "</instruction>"
58
+ ],
59
+ "bos_token": "<s>",
60
+ "bos_token_id": 1,
61
+ "clean_up_tokenization_spaces": false,
62
+ "eos_token": "</s>",
63
+ "eos_token_id": 2,
64
+ "extra_special_tokens": {},
65
+ "legacy": false,
66
+ "model_max_length": 4096,
67
+ "pad_token": "<|finetune_right_pad_id|>",
68
+ "pad_token_id": 0,
69
+ "padding_side": "right",
70
+ "sp_model_kwargs": {},
71
+ "tokenizer_class": "LlamaTokenizerFast",
72
+ "unk_token": "<unk>",
73
+ "unk_token_id": 0,
74
+ "use_default_system_prompt": false
75
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be22cf5bd1fbf355ab329177fb1e83c59a5f49191789aa89a6e3a46aefaf892a
3
+ size 5777