Linksome commited on
Commit
079c0ee
·
verified ·
1 Parent(s): c275981

Delete train_2025-08-07-09-00-30

Browse files
train_2025-08-07-09-00-30/llamaboard_config.yaml DELETED
@@ -1,85 +0,0 @@
1
- top.booster: auto
2
- top.checkpoint_path: []
3
- top.finetuning_type: lora
4
- top.model_name: Llama-3.1-70B
5
- top.quantization_bit: none
6
- top.quantization_method: bnb
7
- top.rope_scaling: none
8
- top.template: default
9
- train.additional_target: ''
10
- train.apollo_rank: 16
11
- train.apollo_scale: 32
12
- train.apollo_target: all
13
- train.apollo_update_interval: 200
14
- train.badam_mode: layer
15
- train.badam_switch_interval: 50
16
- train.badam_switch_mode: ascending
17
- train.badam_update_ratio: 0.05
18
- train.batch_size: 1
19
- train.compute_type: bf16
20
- train.create_new_adapter: false
21
- train.cutoff_len: 2048
22
- train.dataset:
23
- - ASC_CORPUS
24
- train.dataset_dir: data
25
- train.ds_offload: false
26
- train.ds_stage: '3'
27
- train.enable_thinking: true
28
- train.extra_args: '{"optim": "adamw_torch"}'
29
- train.freeze_extra_modules: ''
30
- train.freeze_language_model: false
31
- train.freeze_multi_modal_projector: true
32
- train.freeze_trainable_layers: 2
33
- train.freeze_trainable_modules: all
34
- train.freeze_vision_tower: true
35
- train.galore_rank: 16
36
- train.galore_scale: 2
37
- train.galore_target: all
38
- train.galore_update_interval: 200
39
- train.gradient_accumulation_steps: 8
40
- train.image_max_pixels: 768*768
41
- train.image_min_pixels: 32*32
42
- train.learning_rate: 5e-5
43
- train.logging_steps: 1
44
- train.lora_alpha: 16
45
- train.lora_dropout: 0.05
46
- train.lora_rank: 8
47
- train.lora_target: ''
48
- train.loraplus_lr_ratio: 0
49
- train.lr_scheduler_type: cosine
50
- train.mask_history: false
51
- train.max_grad_norm: '1.0'
52
- train.max_samples: '100000'
53
- train.neat_packing: false
54
- train.neftune_alpha: 0
55
- train.num_train_epochs: '10.0'
56
- train.packing: true
57
- train.ppo_score_norm: false
58
- train.ppo_whiten_rewards: false
59
- train.pref_beta: 0.1
60
- train.pref_ftx: 0
61
- train.pref_loss: sigmoid
62
- train.report_to: wandb
63
- train.resize_vocab: false
64
- train.reward_model: []
65
- train.save_steps: 100
66
- train.swanlab_api_key: ''
67
- train.swanlab_link: ''
68
- train.swanlab_mode: cloud
69
- train.swanlab_project: llamafactory
70
- train.swanlab_run_name: ''
71
- train.swanlab_workspace: ''
72
- train.train_on_prompt: false
73
- train.training_stage: Pre-Training
74
- train.use_apollo: false
75
- train.use_badam: false
76
- train.use_dora: false
77
- train.use_galore: false
78
- train.use_llama_pro: false
79
- train.use_pissa: false
80
- train.use_rslora: false
81
- train.use_swanlab: false
82
- train.val_size: 0
83
- train.video_max_pixels: 256*256
84
- train.video_min_pixels: 16*16
85
- train.warmup_steps: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
train_2025-08-07-09-00-30/running_log.txt DELETED
@@ -1,46 +0,0 @@
1
- [INFO|2025-08-07 09:03:42] configuration_utils.py:750 >> loading configuration file /workspace/meta-llama/Llama-3.1-70B/config.json
2
- [INFO|2025-08-07 09:03:42] configuration_utils.py:817 >> Model config LlamaConfig {
3
- "architectures": [
4
- "LlamaForCausalLM"
5
- ],
6
- "attention_bias": false,
7
- "attention_dropout": 0.0,
8
- "bos_token_id": 128000,
9
- "eos_token_id": 128001,
10
- "head_dim": 128,
11
- "hidden_act": "silu",
12
- "hidden_size": 8192,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 28672,
15
- "max_position_embeddings": 131072,
16
- "mlp_bias": false,
17
- "model_type": "llama",
18
- "num_attention_heads": 64,
19
- "num_hidden_layers": 80,
20
- "num_key_value_heads": 8,
21
- "pretraining_tp": 1,
22
- "rms_norm_eps": 1e-05,
23
- "rope_scaling": {
24
- "factor": 8.0,
25
- "high_freq_factor": 4.0,
26
- "low_freq_factor": 1.0,
27
- "original_max_position_embeddings": 8192,
28
- "rope_type": "llama3"
29
- },
30
- "rope_theta": 500000.0,
31
- "tie_word_embeddings": false,
32
- "torch_dtype": "bfloat16",
33
- "transformers_version": "4.55.0",
34
- "use_cache": true,
35
- "vocab_size": 128256
36
- }
37
-
38
- [INFO|2025-08-07 09:03:42] logging.py:143 >> KV cache is disabled during training.
39
- [INFO|2025-08-07 09:03:42] modeling_utils.py:1305 >> loading weights file /workspace/meta-llama/Llama-3.1-70B/model.safetensors.index.json
40
- [INFO|2025-08-07 09:03:42] modeling_utils.py:4363 >> Detected DeepSpeed ZeRO-3: activating zero.init() for this model
41
- [INFO|2025-08-07 09:03:42] configuration_utils.py:1098 >> Generate config GenerationConfig {
42
- "bos_token_id": 128000,
43
- "eos_token_id": 128001,
44
- "use_cache": false
45
- }
46
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
train_2025-08-07-09-00-30/training_args.yaml DELETED
@@ -1,35 +0,0 @@
1
- bf16: true
2
- cutoff_len: 2048
3
- dataset: ASC_CORPUS
4
- dataset_dir: data
5
- ddp_timeout: 180000000
6
- deepspeed: cache/ds_z3_config.json
7
- do_train: true
8
- enable_thinking: true
9
- finetuning_type: lora
10
- flash_attn: auto
11
- gradient_accumulation_steps: 8
12
- include_num_input_tokens_seen: true
13
- learning_rate: 5.0e-05
14
- logging_steps: 1
15
- lora_alpha: 16
16
- lora_dropout: 0.05
17
- lora_rank: 8
18
- lora_target: all
19
- lr_scheduler_type: cosine
20
- max_grad_norm: 1.0
21
- max_samples: 100000
22
- model_name_or_path: /workspace/meta-llama/Llama-3.1-70B
23
- num_train_epochs: 10.0
24
- optim: adamw_torch
25
- output_dir: saves/Llama-3.1-70B/lora/train_2025-08-07-09-00-30
26
- packing: true
27
- per_device_train_batch_size: 1
28
- plot_loss: true
29
- preprocessing_num_workers: 16
30
- report_to: wandb
31
- save_steps: 100
32
- stage: pt
33
- template: default
34
- trust_remote_code: true
35
- warmup_steps: 0