tim-lawson commited on
Commit
086e2e6
·
verified ·
1 Parent(s): 55c8271

Upload model

Browse files
Files changed (1) hide show
  1. config.json +19 -60
config.json CHANGED
@@ -1,61 +1,20 @@
1
  {
2
- "data": {
3
- "train_files": "data/fineweb_10B_gpt2/fineweb_train_*.bin",
4
- "val_files": "data/fineweb_10B_gpt2/fineweb_val_*.bin",
5
- "batch_size": 512,
6
- "device_batch_size": 32
7
- },
8
- "model": {
9
- "dim": 768,
10
- "n_layers": 8,
11
- "n_heads": 12,
12
- "n_kv_heads": 12,
13
- "vocab_size": 50257,
14
- "multiple_of": 256,
15
- "ffn_dim_multiplier": 4,
16
- "norm_eps": 1e-05,
17
- "rope_theta": 10000,
18
- "use_scaled_rope": false,
19
- "max_seq_len": 1024,
20
- "initializer_range": 0.02,
21
- "zero_init_masks": true
22
- },
23
- "optimizer": {
24
- "default": {
25
- "lr": 0.001,
26
- "beta1": 0.8,
27
- "beta2": 0.95,
28
- "eps": 1e-10,
29
- "weight_decay": 0
30
- },
31
- "masks": {
32
- "lr": 0.001,
33
- "beta1": 0.8,
34
- "beta2": 0.95,
35
- "eps": 1e-10,
36
- "weight_decay": 0
37
- },
38
- "norms": {
39
- "lr": 0.001,
40
- "beta1": 0.8,
41
- "beta2": 0.95,
42
- "eps": 1e-10,
43
- "weight_decay": 0
44
- }
45
- },
46
- "scheduler": {
47
- "warmup_steps": 0.1,
48
- "start_factor": 0.1
49
- },
50
- "gates": {},
51
- "gates_zero_eps": 1e-08,
52
- "seed": 0,
53
- "project": "fineweb-baseline",
54
- "run_id": null,
55
- "logdir": "logs/fineweb-baseline",
56
- "log_gradients": false,
57
- "log_params": false,
58
- "log_every_steps": 1,
59
- "val_every_steps": 100,
60
- "save_every_steps": -1
61
- }
 
1
  {
2
+ "architectures": [
3
+ "SkipMiddleModel"
4
+ ],
5
+ "dim": 768,
6
+ "ffn_dim_multiplier": 4,
7
+ "initializer_range": 0.02,
8
+ "max_seq_len": 1024,
9
+ "multiple_of": 256,
10
+ "n_heads": 12,
11
+ "n_kv_heads": 12,
12
+ "n_layers": 8,
13
+ "norm_eps": 1e-05,
14
+ "rope_theta": 10000,
15
+ "torch_dtype": "bfloat16",
16
+ "transformers_version": "4.51.1",
17
+ "use_scaled_rope": false,
18
+ "vocab_size": 50257,
19
+ "zero_init_masks": true
20
+ }