| { | |
| "batch_size": 128, | |
| "sequence_length": 64, | |
| "num_epochs": 1, | |
| "learning_rate": 0.001, | |
| "seed": 42, | |
| "n_iterations": 10, | |
| "n_freq_train": 1, | |
| "dropout_rate": 0.2, | |
| "num_layers": 6, | |
| "embed_size": 256, | |
| "num_heads": 8, | |
| "head_size": 32, | |
| "use_wandb": false, | |
| "wandb_project": "nanollm", | |
| "wandb_entity": null, | |
| "push_to_hub": true, | |
| "repo_id": "SauravMaheshkar/nanollm", | |
| "vocab_size": 65 | |
| } |