KrisMinchev commited on
Commit
c3a9de3
·
verified ·
1 Parent(s): 0e72e63

End of training

Browse files
README.md CHANGED
@@ -14,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 1.4457
18
 
19
  ## Model description
20
 
@@ -34,32 +34,30 @@ More information needed
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 0.001
37
- - train_batch_size: 12
38
- - eval_batch_size: 12
39
  - seed: 42
40
  - distributed_type: multi-GPU
41
- - num_devices: 8
42
- - total_train_batch_size: 96
43
- - total_eval_batch_size: 96
44
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-06 and optimizer_args=No additional optimizer arguments
45
  - lr_scheduler_type: cosine
46
  - lr_scheduler_warmup_steps: 1000
47
- - training_steps: 41793
48
  - mixed_precision_training: Native AMP
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:------:|:-----:|:---------------:|
54
- | 0.7021 | 0.2393 | 10000 | 1.5429 |
55
- | 0.6804 | 0.4785 | 20000 | 1.4670 |
56
- | 0.6609 | 0.7178 | 30000 | 1.4518 |
57
- | 0.6524 | 0.9571 | 40000 | 1.4457 |
58
 
59
 
60
  ### Framework versions
61
 
62
- - Transformers 4.49.0
63
- - Pytorch 2.6.0+cu124
64
- - Datasets 3.3.2
65
- - Tokenizers 0.21.0
 
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Loss: 1.3226
18
 
19
  ## Model description
20
 
 
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 0.001
37
+ - train_batch_size: 48
38
+ - eval_batch_size: 48
39
  - seed: 42
40
  - distributed_type: multi-GPU
41
+ - num_devices: 4
42
+ - total_train_batch_size: 192
43
+ - total_eval_batch_size: 192
44
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-06 and optimizer_args=No additional optimizer arguments
45
  - lr_scheduler_type: cosine
46
  - lr_scheduler_warmup_steps: 1000
47
+ - training_steps: 23848
48
  - mixed_precision_training: Native AMP
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:------:|:-----:|:---------------:|
54
+ | 0.5839 | 0.4193 | 10000 | 1.3249 |
55
+ | 0.5677 | 0.8386 | 20000 | 1.3226 |
 
 
56
 
57
 
58
  ### Framework versions
59
 
60
+ - Transformers 4.53.0
61
+ - Pytorch 2.5.1
62
+ - Datasets 3.6.0
63
+ - Tokenizers 0.21.2
config.json CHANGED
@@ -24,8 +24,8 @@
24
  "rotary_pct": 0.25,
25
  "tie_word_embeddings": false,
26
  "torch_dtype": "float32",
27
- "transformers_version": "4.49.0",
28
  "use_cache": true,
29
  "use_parallel_residual": true,
30
- "vocab_size": 50432
31
  }
 
24
  "rotary_pct": 0.25,
25
  "tie_word_embeddings": false,
26
  "torch_dtype": "float32",
27
+ "transformers_version": "4.53.0",
28
  "use_cache": true,
29
  "use_parallel_residual": true,
30
+ "vocab_size": 50261
31
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.49.0"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.53.0"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ba51c86358f2ba95b2b5eb856a88def1f3e548a047d87b7bf39d3844f6449c7
3
- size 1460968264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2d0de294713d8cb28d55ab275c2aa328d5f7e9ac085d2f7b6d7bad1f0307782
3
+ size 1459217224
tokenizer.json CHANGED
@@ -11,6 +11,42 @@
11
  "rstrip": false,
12
  "normalized": true,
13
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  }
15
  ],
16
  "normalizer": null,
 
11
  "rstrip": false,
12
  "normalized": true,
13
  "special": true
14
+ },
15
+ {
16
+ "id": 50257,
17
+ "content": "<BIO>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": true,
22
+ "special": false
23
+ },
24
+ {
25
+ "id": 50258,
26
+ "content": "<ENDBIO>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": true,
31
+ "special": false
32
+ },
33
+ {
34
+ "id": 50259,
35
+ "content": "<QA>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": true,
40
+ "special": false
41
+ },
42
+ {
43
+ "id": 50260,
44
+ "content": "<ENDQA>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": true,
49
+ "special": false
50
  }
51
  ],
52
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -10,6 +10,38 @@
10
  "rstrip": false,
11
  "single_word": false,
12
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  }
14
  },
15
  "bos_token": "<|endoftext|>",
 
10
  "rstrip": false,
11
  "single_word": false,
12
  "special": true
13
+ },
14
+ "50257": {
15
+ "content": "<BIO>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": false
21
+ },
22
+ "50258": {
23
+ "content": "<ENDBIO>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": false
29
+ },
30
+ "50259": {
31
+ "content": "<QA>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": false
37
+ },
38
+ "50260": {
39
+ "content": "<ENDQA>",
40
+ "lstrip": false,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": false
45
  }
46
  },
47
  "bos_token": "<|endoftext|>",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de0d26b3d02b3a08fd075bc9ed39abab3ab4da95983e8f55e7e565ea84e75241
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c539ee10a916825eb291db44d9e32f3b1812ba4acc603c3206f0766096dc6285
3
  size 5368