sdsf-flowmatch-largeres-flatts-adamw-342k / config /optimizer-unet-5e7lin-adamw.json
damian0815's picture
Upload folder using huggingface_hub
9403553 verified
{
"doc": {
"base": "base optimizer configuration for unet and text encoder",
"text_encoder_overrides": "text encoder config overrides",
"text_encoder_lr_scale": "if LR not set on text encoder, sets the Lr to a multiple of the Base LR. for example, if base `lr` is 2e-6 and `text_encoder_lr_scale` is 0.5, the text encoder's LR will be set to `1e-6`.",
"-----------------": "-----------------",
"optimizer": "adamw, adamw8bit, lion",
"optimizer_desc": "'adamw' in standard 32bit, 'adamw8bit' is bitsandbytes, 'lion' is lucidrains",
"lr": "learning rate, if null will use CLI or main JSON config value",
"lr_scheduler": "'constant' or 'cosine'",
"lr_warmup_steps": "number of steps to warmup LR to target LR, if null will use CLI or default a value based on max epochs",
"lr_decay_steps": "number of steps to decay LR to zero for cosine, if null will use CLI or default a value based on max epochs",
"betas": "exponential decay rates for the moment estimates",
"epsilon": "value added to denominator for numerical stability, unused for lion",
"weight_decay": "weight decay (L2 penalty)",
"------------------": "-----------------",
"unfreeze_last_n_layers": "if not null, freeze all parameters in the text encoder except for the last n layers and the final layer norm"
},
"base": {
"optimizer": "adamw",
"lr": 5e-7,
"lr_scheduler": "linear",
"lr_decay_steps": null,
"lr_warmup_steps": null,
"betas": [0.9, 0.999],
"epsilon": 1e-8,
"weight_decay": 0.0001
},
"text_encoder_overrides": {
"optimizer": null,
"lr": null,
"lr_scheduler": null,
"lr_decay_steps": null,
"lr_warmup_steps": null,
"betas": null,
"epsilon": null,
"weight_decay": null
},
"text_encoder_freezing": {
"unfreeze_last_n_layers": null
},
"apply_grad_scaler_step_tweaks": false,
"log_grad_norm": true
}