| train_old.py | |
| deepspeed /workspace/llavaguard/configs/zero3.json | |
| model_name_or_path lmms-lab/llava-onevision-qwen2-7b-ov | |
| version qwen_1_5 | |
| data_path /common-repos/LlavaGuard/data/LlavaGuard-DS/v24/train_oversampled.json | |
| data_path_eval /common-repos/LlavaGuard/data/LlavaGuard-DS/v24/eval.json | |
| image_folder /common-repos | |
| mm_tunable_parts=mm_language_model | |
| mm_vision_tower_lr=2e-6 | |
| vision_tower google/siglip-so400m-patch14-384 | |
| mm_projector_type mlp2x_gelu | |
| mm_vision_select_layer -2 | |
| mm_use_im_start_end False | |
| mm_use_im_patch_token False | |
| group_by_modality_length True | |
| image_aspect_ratio anyres_max_9 | |
| image_grid_pinpoints (1x1),...,(6x6) | |
| mm_patch_merge_type spatial_unpad | |
| bf16 True | |
| run_name LlavaGuard-v1.2-7b-ov-lmms-lab_llava-onevision-qwen2-7b-ov-LlavaGuard-DS-v24 | |
| output_dir /common-repos/LlavaGuard/models/LlavaGuard-v1.2-7b-ov/v24 | |
| num_train_epochs 3 | |
| per_device_train_batch_size 1 | |
| per_device_eval_batch_size 2 | |
| gradient_accumulation_steps 25 | |
| evaluation_strategy no | |
| eval_steps 1 | |
| save_strategy epoch | |
| save_steps 1 | |
| save_total_limit 1 | |
| learning_rate 1e-5 | |
| weight_decay 0. | |
| warmup_ratio 0.03 | |
| lr_scheduler_type cosine | |
| logging_steps 1 | |
| tf32 True | |
| model_max_length 32768 | |
| gradient_checkpointing True | |
| dataloader_num_workers 4 | |
| lazy_preprocess True | |
| report_to wandb | |
| torch_compile True | |
| torch_compile_backend inductor | |
| dataloader_drop_last True | |
| Eval date: 25/10/2024 06:40:09 |