{ "batch_size": 128, "sequence_length": 64, "num_epochs": 1, "learning_rate": 0.001, "seed": 42, "n_iterations": 10, "n_freq_train": 1, "dropout_rate": 0.2, "num_layers": 6, "embed_size": 256, "num_heads": 8, "head_size": 32, "use_wandb": false, "wandb_project": "nanollm", "wandb_entity": null, "push_to_hub": true, "repo_id": "SauravMaheshkar/nanollm", "vocab_size": 65 }