End of training

Files changed (7) hide show

README.md CHANGED Viewed

@@ -14,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.4457
 ## Model description
@@ -34,32 +34,30 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.001
-- train_batch_size: 12
-- eval_batch_size: 12
 - seed: 42
 - distributed_type: multi-GPU
-- num_devices: 8
-- total_train_batch_size: 96
-- total_eval_batch_size: 96
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-06 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 1000
-- training_steps: 41793
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch  | Step  | Validation Loss |
 |:-------------:|:------:|:-----:|:---------------:|
-| 0.7021        | 0.2393 | 10000 | 1.5429          |
-| 0.6804        | 0.4785 | 20000 | 1.4670          |
-| 0.6609        | 0.7178 | 30000 | 1.4518          |
-| 0.6524        | 0.9571 | 40000 | 1.4457          |
 ### Framework versions
-- Transformers 4.49.0
-- Pytorch 2.6.0+cu124
-- Datasets 3.3.2
-- Tokenizers 0.21.0

 This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.3226
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 0.001
+- train_batch_size: 48
+- eval_batch_size: 48
 - seed: 42
 - distributed_type: multi-GPU
+- num_devices: 4
+- total_train_batch_size: 192
+- total_eval_batch_size: 192
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-06 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 1000
+- training_steps: 23848
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch  | Step  | Validation Loss |
 |:-------------:|:------:|:-----:|:---------------:|
+| 0.5839        | 0.4193 | 10000 | 1.3249          |
+| 0.5677        | 0.8386 | 20000 | 1.3226          |
 ### Framework versions
+- Transformers 4.53.0
+- Pytorch 2.5.1
+- Datasets 3.6.0
+- Tokenizers 0.21.2

config.json CHANGED Viewed

@@ -24,8 +24,8 @@
   "rotary_pct": 0.25,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
-  "transformers_version": "4.49.0",
   "use_cache": true,
   "use_parallel_residual": true,
-  "vocab_size": 50432
 }

   "rotary_pct": 0.25,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
+  "transformers_version": "4.53.0",
   "use_cache": true,
   "use_parallel_residual": true,
+  "vocab_size": 50261
 }

generation_config.json CHANGED Viewed

@@ -2,5 +2,5 @@
   "_from_model_config": true,
   "bos_token_id": 0,
   "eos_token_id": 2,
-  "transformers_version": "4.49.0"
 }

   "_from_model_config": true,
   "bos_token_id": 0,
   "eos_token_id": 2,
+  "transformers_version": "4.53.0"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ba51c86358f2ba95b2b5eb856a88def1f3e548a047d87b7bf39d3844f6449c7
-size 1460968264

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d0de294713d8cb28d55ab275c2aa328d5f7e9ac085d2f7b6d7bad1f0307782
+size 1459217224

tokenizer.json CHANGED Viewed

@@ -11,6 +11,42 @@
       "rstrip": false,
       "normalized": true,
       "special": true
     }
   ],
   "normalizer": null,

       "rstrip": false,
       "normalized": true,
       "special": true
+    },
+    {
+      "id": 50257,
+      "content": "<BIO>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
+    {
+      "id": 50258,
+      "content": "<ENDBIO>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
+    {
+      "id": 50259,
+      "content": "<QA>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
+    },
+    {
+      "id": 50260,
+      "content": "<ENDQA>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
     }
   ],
   "normalizer": null,

tokenizer_config.json CHANGED Viewed

@@ -10,6 +10,38 @@
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "bos_token": "<|endoftext|>",

       "rstrip": false,
       "single_word": false,
       "special": true
+    },
+    "50257": {
+      "content": "<BIO>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50258": {
+      "content": "<ENDBIO>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50259": {
+      "content": "<QA>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50260": {
+      "content": "<ENDQA>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
     }
   },
   "bos_token": "<|endoftext|>",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de0d26b3d02b3a08fd075bc9ed39abab3ab4da95983e8f55e7e565ea84e75241
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:c539ee10a916825eb291db44d9e32f3b1812ba4acc603c3206f0766096dc6285
 size 5368