Delete train_2025-08-07-09-00-30
Browse files
train_2025-08-07-09-00-30/llamaboard_config.yaml
DELETED
|
@@ -1,85 +0,0 @@
|
|
| 1 |
-
top.booster: auto
|
| 2 |
-
top.checkpoint_path: []
|
| 3 |
-
top.finetuning_type: lora
|
| 4 |
-
top.model_name: Llama-3.1-70B
|
| 5 |
-
top.quantization_bit: none
|
| 6 |
-
top.quantization_method: bnb
|
| 7 |
-
top.rope_scaling: none
|
| 8 |
-
top.template: default
|
| 9 |
-
train.additional_target: ''
|
| 10 |
-
train.apollo_rank: 16
|
| 11 |
-
train.apollo_scale: 32
|
| 12 |
-
train.apollo_target: all
|
| 13 |
-
train.apollo_update_interval: 200
|
| 14 |
-
train.badam_mode: layer
|
| 15 |
-
train.badam_switch_interval: 50
|
| 16 |
-
train.badam_switch_mode: ascending
|
| 17 |
-
train.badam_update_ratio: 0.05
|
| 18 |
-
train.batch_size: 1
|
| 19 |
-
train.compute_type: bf16
|
| 20 |
-
train.create_new_adapter: false
|
| 21 |
-
train.cutoff_len: 2048
|
| 22 |
-
train.dataset:
|
| 23 |
-
- ASC_CORPUS
|
| 24 |
-
train.dataset_dir: data
|
| 25 |
-
train.ds_offload: false
|
| 26 |
-
train.ds_stage: '3'
|
| 27 |
-
train.enable_thinking: true
|
| 28 |
-
train.extra_args: '{"optim": "adamw_torch"}'
|
| 29 |
-
train.freeze_extra_modules: ''
|
| 30 |
-
train.freeze_language_model: false
|
| 31 |
-
train.freeze_multi_modal_projector: true
|
| 32 |
-
train.freeze_trainable_layers: 2
|
| 33 |
-
train.freeze_trainable_modules: all
|
| 34 |
-
train.freeze_vision_tower: true
|
| 35 |
-
train.galore_rank: 16
|
| 36 |
-
train.galore_scale: 2
|
| 37 |
-
train.galore_target: all
|
| 38 |
-
train.galore_update_interval: 200
|
| 39 |
-
train.gradient_accumulation_steps: 8
|
| 40 |
-
train.image_max_pixels: 768*768
|
| 41 |
-
train.image_min_pixels: 32*32
|
| 42 |
-
train.learning_rate: 5e-5
|
| 43 |
-
train.logging_steps: 1
|
| 44 |
-
train.lora_alpha: 16
|
| 45 |
-
train.lora_dropout: 0.05
|
| 46 |
-
train.lora_rank: 8
|
| 47 |
-
train.lora_target: ''
|
| 48 |
-
train.loraplus_lr_ratio: 0
|
| 49 |
-
train.lr_scheduler_type: cosine
|
| 50 |
-
train.mask_history: false
|
| 51 |
-
train.max_grad_norm: '1.0'
|
| 52 |
-
train.max_samples: '100000'
|
| 53 |
-
train.neat_packing: false
|
| 54 |
-
train.neftune_alpha: 0
|
| 55 |
-
train.num_train_epochs: '10.0'
|
| 56 |
-
train.packing: true
|
| 57 |
-
train.ppo_score_norm: false
|
| 58 |
-
train.ppo_whiten_rewards: false
|
| 59 |
-
train.pref_beta: 0.1
|
| 60 |
-
train.pref_ftx: 0
|
| 61 |
-
train.pref_loss: sigmoid
|
| 62 |
-
train.report_to: wandb
|
| 63 |
-
train.resize_vocab: false
|
| 64 |
-
train.reward_model: []
|
| 65 |
-
train.save_steps: 100
|
| 66 |
-
train.swanlab_api_key: ''
|
| 67 |
-
train.swanlab_link: ''
|
| 68 |
-
train.swanlab_mode: cloud
|
| 69 |
-
train.swanlab_project: llamafactory
|
| 70 |
-
train.swanlab_run_name: ''
|
| 71 |
-
train.swanlab_workspace: ''
|
| 72 |
-
train.train_on_prompt: false
|
| 73 |
-
train.training_stage: Pre-Training
|
| 74 |
-
train.use_apollo: false
|
| 75 |
-
train.use_badam: false
|
| 76 |
-
train.use_dora: false
|
| 77 |
-
train.use_galore: false
|
| 78 |
-
train.use_llama_pro: false
|
| 79 |
-
train.use_pissa: false
|
| 80 |
-
train.use_rslora: false
|
| 81 |
-
train.use_swanlab: false
|
| 82 |
-
train.val_size: 0
|
| 83 |
-
train.video_max_pixels: 256*256
|
| 84 |
-
train.video_min_pixels: 16*16
|
| 85 |
-
train.warmup_steps: 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
train_2025-08-07-09-00-30/running_log.txt
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
[INFO|2025-08-07 09:03:42] configuration_utils.py:750 >> loading configuration file /workspace/meta-llama/Llama-3.1-70B/config.json
|
| 2 |
-
[INFO|2025-08-07 09:03:42] configuration_utils.py:817 >> Model config LlamaConfig {
|
| 3 |
-
"architectures": [
|
| 4 |
-
"LlamaForCausalLM"
|
| 5 |
-
],
|
| 6 |
-
"attention_bias": false,
|
| 7 |
-
"attention_dropout": 0.0,
|
| 8 |
-
"bos_token_id": 128000,
|
| 9 |
-
"eos_token_id": 128001,
|
| 10 |
-
"head_dim": 128,
|
| 11 |
-
"hidden_act": "silu",
|
| 12 |
-
"hidden_size": 8192,
|
| 13 |
-
"initializer_range": 0.02,
|
| 14 |
-
"intermediate_size": 28672,
|
| 15 |
-
"max_position_embeddings": 131072,
|
| 16 |
-
"mlp_bias": false,
|
| 17 |
-
"model_type": "llama",
|
| 18 |
-
"num_attention_heads": 64,
|
| 19 |
-
"num_hidden_layers": 80,
|
| 20 |
-
"num_key_value_heads": 8,
|
| 21 |
-
"pretraining_tp": 1,
|
| 22 |
-
"rms_norm_eps": 1e-05,
|
| 23 |
-
"rope_scaling": {
|
| 24 |
-
"factor": 8.0,
|
| 25 |
-
"high_freq_factor": 4.0,
|
| 26 |
-
"low_freq_factor": 1.0,
|
| 27 |
-
"original_max_position_embeddings": 8192,
|
| 28 |
-
"rope_type": "llama3"
|
| 29 |
-
},
|
| 30 |
-
"rope_theta": 500000.0,
|
| 31 |
-
"tie_word_embeddings": false,
|
| 32 |
-
"torch_dtype": "bfloat16",
|
| 33 |
-
"transformers_version": "4.55.0",
|
| 34 |
-
"use_cache": true,
|
| 35 |
-
"vocab_size": 128256
|
| 36 |
-
}
|
| 37 |
-
|
| 38 |
-
[INFO|2025-08-07 09:03:42] logging.py:143 >> KV cache is disabled during training.
|
| 39 |
-
[INFO|2025-08-07 09:03:42] modeling_utils.py:1305 >> loading weights file /workspace/meta-llama/Llama-3.1-70B/model.safetensors.index.json
|
| 40 |
-
[INFO|2025-08-07 09:03:42] modeling_utils.py:4363 >> Detected DeepSpeed ZeRO-3: activating zero.init() for this model
|
| 41 |
-
[INFO|2025-08-07 09:03:42] configuration_utils.py:1098 >> Generate config GenerationConfig {
|
| 42 |
-
"bos_token_id": 128000,
|
| 43 |
-
"eos_token_id": 128001,
|
| 44 |
-
"use_cache": false
|
| 45 |
-
}
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
train_2025-08-07-09-00-30/training_args.yaml
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
bf16: true
|
| 2 |
-
cutoff_len: 2048
|
| 3 |
-
dataset: ASC_CORPUS
|
| 4 |
-
dataset_dir: data
|
| 5 |
-
ddp_timeout: 180000000
|
| 6 |
-
deepspeed: cache/ds_z3_config.json
|
| 7 |
-
do_train: true
|
| 8 |
-
enable_thinking: true
|
| 9 |
-
finetuning_type: lora
|
| 10 |
-
flash_attn: auto
|
| 11 |
-
gradient_accumulation_steps: 8
|
| 12 |
-
include_num_input_tokens_seen: true
|
| 13 |
-
learning_rate: 5.0e-05
|
| 14 |
-
logging_steps: 1
|
| 15 |
-
lora_alpha: 16
|
| 16 |
-
lora_dropout: 0.05
|
| 17 |
-
lora_rank: 8
|
| 18 |
-
lora_target: all
|
| 19 |
-
lr_scheduler_type: cosine
|
| 20 |
-
max_grad_norm: 1.0
|
| 21 |
-
max_samples: 100000
|
| 22 |
-
model_name_or_path: /workspace/meta-llama/Llama-3.1-70B
|
| 23 |
-
num_train_epochs: 10.0
|
| 24 |
-
optim: adamw_torch
|
| 25 |
-
output_dir: saves/Llama-3.1-70B/lora/train_2025-08-07-09-00-30
|
| 26 |
-
packing: true
|
| 27 |
-
per_device_train_batch_size: 1
|
| 28 |
-
plot_loss: true
|
| 29 |
-
preprocessing_num_workers: 16
|
| 30 |
-
report_to: wandb
|
| 31 |
-
save_steps: 100
|
| 32 |
-
stage: pt
|
| 33 |
-
template: default
|
| 34 |
-
trust_remote_code: true
|
| 35 |
-
warmup_steps: 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|