seed: 101112 ### model model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct trust_remote_code: true flash_attn: auto use_cache: false ### method stage: sft do_train: true finetuning_type: prefix-tuning task_type: CAUSAL_LM num_virtual_tokens: 32 prefix_projection: true encoder_hidden_size: 512 disable_gradient_checkpointing: true ### dataset dataset: conala template: llama3 cutoff_len: 2048 overwrite_cache: true preprocessing_num_workers: 8 dataloader_num_workers: 2 packing: false ### output output_dir: saves_multiple/prefix-tuning/llama-3-8b-instruct/train_conala_101112_1760638005 logging_steps: 5 save_steps: 0.1 overwrite_output_dir: true save_only_model: false plot_loss: true include_num_input_tokens_seen: true push_to_hub: true push_to_hub_organization: rbelanec load_best_model_at_end: true save_total_limit: 1 ### train per_device_train_batch_size: 4 learning_rate: 1.0e-5 num_train_epochs: 20 weight_decay: 1.0e-5 lr_scheduler_type: cosine bf16: true ddp_timeout: 180000000 resume_from_checkpoint: null warmup_ratio: 0.1 optim: adamw_torch report_to: - wandb run_name: prefix-tuning_llama-3-8b-instruct_train_conala_101112_1760638005 ### eval per_device_eval_batch_size: 4 eval_strategy: steps eval_steps: 0.1 val_size: 0.2