| { | |
| "data_dir": "/root/dev_ac/data", | |
| "output_dir": "/root/outputs-oc", | |
| "model_name": "unsloth/gpt-oss-20b", | |
| "max_seq_length": 1024, | |
| "per_device_train_batch_size": 1, | |
| "gradient_accumulation_steps": 8, | |
| "learning_rate": 5e-05, | |
| "num_train_epochs": 300.0, | |
| "warmup_ratio": 0.03, | |
| "weight_decay": 0.01, | |
| "logging_steps": 10, | |
| "save_steps": 200, | |
| "seed": 3407, | |
| "test_size": 0.0, | |
| "lang_tag": "<|oc|>", | |
| "min_chars": 50, | |
| "dedup": true, | |
| "lora_r": 8, | |
| "lora_alpha": 16, | |
| "lora_dropout": 0.0, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ], | |
| "sample_prompt": null, | |
| "sample_max_new_tokens": 200, | |
| "sample_temperature": 0.8, | |
| "sample_top_p": 0.9, | |
| "push_to_hub": true, | |
| "repo_id": "ahan2000/oc-continued-pretrain-20b-lora", | |
| "private": true | |
| } |