Added PEFT and diffusion-pipe config data
Browse files- adapter_config.json +40 -0
- config.toml +60 -0
- wandb-metadata.json +46 -0
adapter_config.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": null,
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"corda_config": null,
|
| 7 |
+
"eva_config": null,
|
| 8 |
+
"exclude_modules": null,
|
| 9 |
+
"fan_in_fan_out": false,
|
| 10 |
+
"inference_mode": false,
|
| 11 |
+
"init_lora_weights": true,
|
| 12 |
+
"layer_replication": null,
|
| 13 |
+
"layers_pattern": null,
|
| 14 |
+
"layers_to_transform": null,
|
| 15 |
+
"loftq_config": {},
|
| 16 |
+
"lora_alpha": 32,
|
| 17 |
+
"lora_bias": false,
|
| 18 |
+
"lora_dropout": 0.0,
|
| 19 |
+
"megatron_config": null,
|
| 20 |
+
"megatron_core": "megatron.core",
|
| 21 |
+
"modules_to_save": null,
|
| 22 |
+
"peft_type": "LORA",
|
| 23 |
+
"qalora_group_size": 16,
|
| 24 |
+
"r": 32,
|
| 25 |
+
"rank_pattern": {},
|
| 26 |
+
"revision": null,
|
| 27 |
+
"target_modules": [
|
| 28 |
+
"q",
|
| 29 |
+
"k",
|
| 30 |
+
"ffn.2",
|
| 31 |
+
"o",
|
| 32 |
+
"v",
|
| 33 |
+
"ffn.0"
|
| 34 |
+
],
|
| 35 |
+
"task_type": null,
|
| 36 |
+
"trainable_token_indices": null,
|
| 37 |
+
"use_dora": false,
|
| 38 |
+
"use_qalora": false,
|
| 39 |
+
"use_rslora": false
|
| 40 |
+
}
|
config.toml
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dataset config file.
|
| 2 |
+
output_dir = '/root/outputs'
|
| 3 |
+
dataset = 'configs/dataset.toml'
|
| 4 |
+
|
| 5 |
+
# Training settings
|
| 6 |
+
epochs = 50
|
| 7 |
+
micro_batch_size_per_gpu = 5
|
| 8 |
+
pipeline_stages = 1
|
| 9 |
+
gradient_accumulation_steps = 1
|
| 10 |
+
gradient_clipping = 1.0
|
| 11 |
+
warmup_steps = 20
|
| 12 |
+
|
| 13 |
+
# eval settings
|
| 14 |
+
eval_every_n_epochs = 1
|
| 15 |
+
eval_before_first_step = false
|
| 16 |
+
eval_micro_batch_size_per_gpu = 5
|
| 17 |
+
eval_gradient_accumulation_steps = 1
|
| 18 |
+
|
| 19 |
+
# misc settings
|
| 20 |
+
save_every_n_epochs = 2
|
| 21 |
+
checkpoint_every_n_epochs = 2
|
| 22 |
+
activation_checkpointing = true
|
| 23 |
+
partition_method = 'parameters'
|
| 24 |
+
save_dtype = 'bfloat16'
|
| 25 |
+
caching_batch_size = 3
|
| 26 |
+
steps_per_print = 1
|
| 27 |
+
compile = true
|
| 28 |
+
video_clip_mode = 'single_beginning'
|
| 29 |
+
#blocks_to_swap = 15
|
| 30 |
+
|
| 31 |
+
[model]
|
| 32 |
+
type = 'wan'
|
| 33 |
+
ckpt_path = '/root/outputs/models/Wan2.1-T2V-14B'
|
| 34 |
+
#diffusers_path = '/root/outputs/models/FLUX.1-dev'
|
| 35 |
+
#transformer_path = '/root/outputs/models/chroma/Chroma.safetensors'
|
| 36 |
+
llm_path = '/root/outputs/models/Wan2.1-T2V-14B/models_t5_umt5-xxl-enc-bf16.pth'
|
| 37 |
+
dtype = 'bfloat16'
|
| 38 |
+
# You can optionally load the transformer in fp8 when training LoRAs.
|
| 39 |
+
transformer_dtype = 'float8'
|
| 40 |
+
timestep_sample_method = 'logit_normal'
|
| 41 |
+
#flux_shift = true
|
| 42 |
+
|
| 43 |
+
[adapter]
|
| 44 |
+
type = 'lora'
|
| 45 |
+
rank = 32
|
| 46 |
+
dtype = 'bfloat16'
|
| 47 |
+
#init_from_existing = '/root/outputs/5c6d31124f144544913effcc0a17e0ea/epoch10'
|
| 48 |
+
|
| 49 |
+
[optimizer]
|
| 50 |
+
type = 'adamw_optimi'
|
| 51 |
+
lr = 2e-4
|
| 52 |
+
betas = [0.9, 0.99]
|
| 53 |
+
weight_decay = 0.01
|
| 54 |
+
eps = 1e-8
|
| 55 |
+
|
| 56 |
+
[monitoring]
|
| 57 |
+
enable_wandb = true
|
| 58 |
+
wandb_api_key = '316edc68e8c9d21674a9bb887bc5c599e2c94e35'
|
| 59 |
+
wandb_tracker_name = 'wan'
|
| 60 |
+
wandb_run_name = 'wan-kwis-v1'
|
wandb-metadata.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-4.4.0-x86_64-with-glibc2.36",
|
| 3 |
+
"python": "CPython 3.12.7",
|
| 4 |
+
"startedAt": "2025-08-07T17:50:41.273316Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--local_rank=0",
|
| 7 |
+
"--deepspeed",
|
| 8 |
+
"--config",
|
| 9 |
+
"configs/config.toml"
|
| 10 |
+
],
|
| 11 |
+
"program": "/root/diffusion-pipe/train.py",
|
| 12 |
+
"codePath": "train.py",
|
| 13 |
+
"codePathLocal": "train.py",
|
| 14 |
+
"git": {
|
| 15 |
+
"remote": "https://github.com/tdrussell/diffusion-pipe.git",
|
| 16 |
+
"commit": "6b65063a5085f799cc91508c735ce4cb6fc6b036"
|
| 17 |
+
},
|
| 18 |
+
"email": "[email protected]",
|
| 19 |
+
"root": "/root/outputs/models",
|
| 20 |
+
"host": "modal",
|
| 21 |
+
"executable": "/opt/conda/bin/python3.12",
|
| 22 |
+
"cpu_count": 17,
|
| 23 |
+
"cpu_count_logical": 17,
|
| 24 |
+
"gpu": "NVIDIA L40S",
|
| 25 |
+
"gpu_count": 1,
|
| 26 |
+
"disk": {
|
| 27 |
+
"/": {
|
| 28 |
+
"total": "549755813888",
|
| 29 |
+
"used": "13826981888"
|
| 30 |
+
}
|
| 31 |
+
},
|
| 32 |
+
"memory": {
|
| 33 |
+
"total": "359713656832"
|
| 34 |
+
},
|
| 35 |
+
"gpu_nvidia": [
|
| 36 |
+
{
|
| 37 |
+
"name": "NVIDIA L40S",
|
| 38 |
+
"memoryTotal": "48305799168",
|
| 39 |
+
"cudaCores": 18176,
|
| 40 |
+
"architecture": "Ada",
|
| 41 |
+
"uuid": "GPU-fa9be711-9328-649f-0757-e99aedf64bb8"
|
| 42 |
+
}
|
| 43 |
+
],
|
| 44 |
+
"cudaVersion": "12.9",
|
| 45 |
+
"writerId": "54v4o419o3hllzfl28qql5njcy5i57ok"
|
| 46 |
+
}
|