tim-lawson's picture
Upload folder using huggingface_hub
f3e8ea1 verified
{
"data": {
"train_files": "data/fineweb_10B_gpt2/fineweb_train_*.bin",
"val_files": "data/fineweb_10B_gpt2/fineweb_val_*.bin",
"batch_size": 512,
"device_batch_size": 32
},
"model": {
"dim": 768,
"n_layers": 8,
"n_heads": 12,
"n_kv_heads": 12,
"vocab_size": 50257,
"multiple_of": 256,
"ffn_dim_multiplier": 4,
"norm_eps": 1e-05,
"rope_theta": 10000,
"use_scaled_rope": false,
"max_seq_len": 1024,
"initializer_range": 0.02,
"zero_init_masks": true
},
"optimizer": {
"default": {
"lr": 0.001,
"beta1": 0.8,
"beta2": 0.95,
"eps": 1e-10,
"weight_decay": 0
},
"masks": {
"lr": 0.001,
"beta1": 0.8,
"beta2": 0.95,
"eps": 1e-10,
"weight_decay": 0
},
"norms": {
"lr": 0.001,
"beta1": 0.8,
"beta2": 0.95,
"eps": 1e-10,
"weight_decay": 0
}
},
"scheduler": {
"warmup_steps": 0.1,
"start_factor": 0.1
},
"gates": {},
"gates_zero_eps": 1e-08,
"seed": 0,
"project": "fineweb-baseline",
"run_id": null,
"logdir": "logs/fineweb-baseline",
"log_gradients": false,
"log_params": false,
"log_every_steps": 1,
"val_every_steps": 100,
"save_every_steps": -1
}