| { | |
| "model_name": "llama2_lora", | |
| "finetuning_config": { | |
| "learning_rate": 5e-05, | |
| "gradient_accumulation_steps": 1, | |
| "batch_size": 1, | |
| "weight_decay": 0.01, | |
| "warmup_steps": 50, | |
| "eval_steps": 5000, | |
| "save_steps": 5000, | |
| "max_length": 512, | |
| "num_train_epochs": 3, | |
| "logging_steps": 10, | |
| "max_grad_norm": 2.0, | |
| "save_total_limit": 4, | |
| "optimizer_name": "adamw", | |
| "output_dir": "./checkpoints/llama2-lora_nq-mctx_em-top1" | |
| }, | |
| "generation_config": { | |
| "penalty_alpha": 0.6, | |
| "top_k": 4, | |
| "max_new_tokens": 256, | |
| "do_sample": false, | |
| "top_p": null | |
| } | |
| } |