| default_stage: | |
| default_modifiers: | |
| AWQModifier: | |
| config_groups: | |
| group_0: | |
| targets: [Linear] | |
| weights: | |
| num_bits: 4 | |
| type: int | |
| symmetric: true | |
| group_size: 32 | |
| strategy: group | |
| block_structure: null | |
| dynamic: false | |
| actorder: null | |
| observer: mse | |
| observer_kwargs: {} | |
| input_activations: null | |
| output_activations: null | |
| format: null | |
| targets: [Linear] | |
| ignore: [model.embed_tokens, model.embedding_norm, 're:model[.]layers[.]0[.].*', 're:model[.]layers[.]1[.].*', | |
| 're:.*feed_forward[.]gate$', 're:.*ffn_norm$', 're:.*operator_norm$', 're:.*self_attn.*', | |
| 're:.*conv.*', lm_head] | |
| mappings: | |
| - smooth_layer: model.layers.0.operator_norm | |
| balance_layers: [model.layers.0.conv.in_proj] | |
| - smooth_layer: model.layers.0.ffn_norm | |
| balance_layers: [model.layers.0.feed_forward.w1, model.layers.0.feed_forward.w3] | |
| - smooth_layer: model.layers.1.operator_norm | |
| balance_layers: [model.layers.1.conv.in_proj] | |
| - smooth_layer: model.layers.1.ffn_norm | |
| balance_layers: [model.layers.1.feed_forward.w1, model.layers.1.feed_forward.w3] | |
| - smooth_layer: model.layers.2.operator_norm | |
| balance_layers: [model.layers.2.self_attn.q_proj, model.layers.2.self_attn.k_proj, | |
| model.layers.2.self_attn.v_proj] | |
| - smooth_layer: model.layers.2.ffn_norm | |
| balance_layers: [model.layers.2.feed_forward.gate, model.layers.2.feed_forward.experts.0.w1, | |
| model.layers.2.feed_forward.experts.0.w3, model.layers.2.feed_forward.experts.1.w1, | |
| model.layers.2.feed_forward.experts.1.w3, model.layers.2.feed_forward.experts.2.w1, | |
| model.layers.2.feed_forward.experts.2.w3, model.layers.2.feed_forward.experts.3.w1, | |
| model.layers.2.feed_forward.experts.3.w3, model.layers.2.feed_forward.experts.4.w1, | |
| model.layers.2.feed_forward.experts.4.w3, model.layers.2.feed_forward.experts.5.w1, | |
| model.layers.2.feed_forward.experts.5.w3, model.layers.2.feed_forward.experts.6.w1, | |
| model.layers.2.feed_forward.experts.6.w3, model.layers.2.feed_forward.experts.7.w1, | |
| model.layers.2.feed_forward.experts.7.w3, model.layers.2.feed_forward.experts.8.w1, | |
| model.layers.2.feed_forward.experts.8.w3, model.layers.2.feed_forward.experts.9.w1, | |
| model.layers.2.feed_forward.experts.9.w3, model.layers.2.feed_forward.experts.10.w1, | |
| model.layers.2.feed_forward.experts.10.w3, model.layers.2.feed_forward.experts.11.w1, | |
| model.layers.2.feed_forward.experts.11.w3, model.layers.2.feed_forward.experts.12.w1, | |
| model.layers.2.feed_forward.experts.12.w3, model.layers.2.feed_forward.experts.13.w1, | |
| model.layers.2.feed_forward.experts.13.w3, model.layers.2.feed_forward.experts.14.w1, | |
| model.layers.2.feed_forward.experts.14.w3, model.layers.2.feed_forward.experts.15.w1, | |
| model.layers.2.feed_forward.experts.15.w3, model.layers.2.feed_forward.experts.16.w1, | |
| model.layers.2.feed_forward.experts.16.w3, model.layers.2.feed_forward.experts.17.w1, | |
| model.layers.2.feed_forward.experts.17.w3, model.layers.2.feed_forward.experts.18.w1, | |
| model.layers.2.feed_forward.experts.18.w3, model.layers.2.feed_forward.experts.19.w1, | |
| model.layers.2.feed_forward.experts.19.w3, model.layers.2.feed_forward.experts.20.w1, | |
| model.layers.2.feed_forward.experts.20.w3, model.layers.2.feed_forward.experts.21.w1, | |
| model.layers.2.feed_forward.experts.21.w3, model.layers.2.feed_forward.experts.22.w1, | |
| model.layers.2.feed_forward.experts.22.w3, model.layers.2.feed_forward.experts.23.w1, | |
| model.layers.2.feed_forward.experts.23.w3, model.layers.2.feed_forward.experts.24.w1, | |
| model.layers.2.feed_forward.experts.24.w3, model.layers.2.feed_forward.experts.25.w1, | |
| model.layers.2.feed_forward.experts.25.w3, model.layers.2.feed_forward.experts.26.w1, | |
| model.layers.2.feed_forward.experts.26.w3, model.layers.2.feed_forward.experts.27.w1, | |
| model.layers.2.feed_forward.experts.27.w3, model.layers.2.feed_forward.experts.28.w1, | |
| model.layers.2.feed_forward.experts.28.w3, model.layers.2.feed_forward.experts.29.w1, | |
| model.layers.2.feed_forward.experts.29.w3, model.layers.2.feed_forward.experts.30.w1, | |
| model.layers.2.feed_forward.experts.30.w3, model.layers.2.feed_forward.experts.31.w1, | |
| model.layers.2.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.3.operator_norm | |
| balance_layers: [model.layers.3.conv.in_proj] | |
| - smooth_layer: model.layers.3.ffn_norm | |
| balance_layers: [model.layers.3.feed_forward.gate, model.layers.3.feed_forward.experts.0.w1, | |
| model.layers.3.feed_forward.experts.0.w3, model.layers.3.feed_forward.experts.1.w1, | |
| model.layers.3.feed_forward.experts.1.w3, model.layers.3.feed_forward.experts.2.w1, | |
| model.layers.3.feed_forward.experts.2.w3, model.layers.3.feed_forward.experts.3.w1, | |
| model.layers.3.feed_forward.experts.3.w3, model.layers.3.feed_forward.experts.4.w1, | |
| model.layers.3.feed_forward.experts.4.w3, model.layers.3.feed_forward.experts.5.w1, | |
| model.layers.3.feed_forward.experts.5.w3, model.layers.3.feed_forward.experts.6.w1, | |
| model.layers.3.feed_forward.experts.6.w3, model.layers.3.feed_forward.experts.7.w1, | |
| model.layers.3.feed_forward.experts.7.w3, model.layers.3.feed_forward.experts.8.w1, | |
| model.layers.3.feed_forward.experts.8.w3, model.layers.3.feed_forward.experts.9.w1, | |
| model.layers.3.feed_forward.experts.9.w3, model.layers.3.feed_forward.experts.10.w1, | |
| model.layers.3.feed_forward.experts.10.w3, model.layers.3.feed_forward.experts.11.w1, | |
| model.layers.3.feed_forward.experts.11.w3, model.layers.3.feed_forward.experts.12.w1, | |
| model.layers.3.feed_forward.experts.12.w3, model.layers.3.feed_forward.experts.13.w1, | |
| model.layers.3.feed_forward.experts.13.w3, model.layers.3.feed_forward.experts.14.w1, | |
| model.layers.3.feed_forward.experts.14.w3, model.layers.3.feed_forward.experts.15.w1, | |
| model.layers.3.feed_forward.experts.15.w3, model.layers.3.feed_forward.experts.16.w1, | |
| model.layers.3.feed_forward.experts.16.w3, model.layers.3.feed_forward.experts.17.w1, | |
| model.layers.3.feed_forward.experts.17.w3, model.layers.3.feed_forward.experts.18.w1, | |
| model.layers.3.feed_forward.experts.18.w3, model.layers.3.feed_forward.experts.19.w1, | |
| model.layers.3.feed_forward.experts.19.w3, model.layers.3.feed_forward.experts.20.w1, | |
| model.layers.3.feed_forward.experts.20.w3, model.layers.3.feed_forward.experts.21.w1, | |
| model.layers.3.feed_forward.experts.21.w3, model.layers.3.feed_forward.experts.22.w1, | |
| model.layers.3.feed_forward.experts.22.w3, model.layers.3.feed_forward.experts.23.w1, | |
| model.layers.3.feed_forward.experts.23.w3, model.layers.3.feed_forward.experts.24.w1, | |
| model.layers.3.feed_forward.experts.24.w3, model.layers.3.feed_forward.experts.25.w1, | |
| model.layers.3.feed_forward.experts.25.w3, model.layers.3.feed_forward.experts.26.w1, | |
| model.layers.3.feed_forward.experts.26.w3, model.layers.3.feed_forward.experts.27.w1, | |
| model.layers.3.feed_forward.experts.27.w3, model.layers.3.feed_forward.experts.28.w1, | |
| model.layers.3.feed_forward.experts.28.w3, model.layers.3.feed_forward.experts.29.w1, | |
| model.layers.3.feed_forward.experts.29.w3, model.layers.3.feed_forward.experts.30.w1, | |
| model.layers.3.feed_forward.experts.30.w3, model.layers.3.feed_forward.experts.31.w1, | |
| model.layers.3.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.4.operator_norm | |
| balance_layers: [model.layers.4.conv.in_proj] | |
| - smooth_layer: model.layers.4.ffn_norm | |
| balance_layers: [model.layers.4.feed_forward.gate, model.layers.4.feed_forward.experts.0.w1, | |
| model.layers.4.feed_forward.experts.0.w3, model.layers.4.feed_forward.experts.1.w1, | |
| model.layers.4.feed_forward.experts.1.w3, model.layers.4.feed_forward.experts.2.w1, | |
| model.layers.4.feed_forward.experts.2.w3, model.layers.4.feed_forward.experts.3.w1, | |
| model.layers.4.feed_forward.experts.3.w3, model.layers.4.feed_forward.experts.4.w1, | |
| model.layers.4.feed_forward.experts.4.w3, model.layers.4.feed_forward.experts.5.w1, | |
| model.layers.4.feed_forward.experts.5.w3, model.layers.4.feed_forward.experts.6.w1, | |
| model.layers.4.feed_forward.experts.6.w3, model.layers.4.feed_forward.experts.7.w1, | |
| model.layers.4.feed_forward.experts.7.w3, model.layers.4.feed_forward.experts.8.w1, | |
| model.layers.4.feed_forward.experts.8.w3, model.layers.4.feed_forward.experts.9.w1, | |
| model.layers.4.feed_forward.experts.9.w3, model.layers.4.feed_forward.experts.10.w1, | |
| model.layers.4.feed_forward.experts.10.w3, model.layers.4.feed_forward.experts.11.w1, | |
| model.layers.4.feed_forward.experts.11.w3, model.layers.4.feed_forward.experts.12.w1, | |
| model.layers.4.feed_forward.experts.12.w3, model.layers.4.feed_forward.experts.13.w1, | |
| model.layers.4.feed_forward.experts.13.w3, model.layers.4.feed_forward.experts.14.w1, | |
| model.layers.4.feed_forward.experts.14.w3, model.layers.4.feed_forward.experts.15.w1, | |
| model.layers.4.feed_forward.experts.15.w3, model.layers.4.feed_forward.experts.16.w1, | |
| model.layers.4.feed_forward.experts.16.w3, model.layers.4.feed_forward.experts.17.w1, | |
| model.layers.4.feed_forward.experts.17.w3, model.layers.4.feed_forward.experts.18.w1, | |
| model.layers.4.feed_forward.experts.18.w3, model.layers.4.feed_forward.experts.19.w1, | |
| model.layers.4.feed_forward.experts.19.w3, model.layers.4.feed_forward.experts.20.w1, | |
| model.layers.4.feed_forward.experts.20.w3, model.layers.4.feed_forward.experts.21.w1, | |
| model.layers.4.feed_forward.experts.21.w3, model.layers.4.feed_forward.experts.22.w1, | |
| model.layers.4.feed_forward.experts.22.w3, model.layers.4.feed_forward.experts.23.w1, | |
| model.layers.4.feed_forward.experts.23.w3, model.layers.4.feed_forward.experts.24.w1, | |
| model.layers.4.feed_forward.experts.24.w3, model.layers.4.feed_forward.experts.25.w1, | |
| model.layers.4.feed_forward.experts.25.w3, model.layers.4.feed_forward.experts.26.w1, | |
| model.layers.4.feed_forward.experts.26.w3, model.layers.4.feed_forward.experts.27.w1, | |
| model.layers.4.feed_forward.experts.27.w3, model.layers.4.feed_forward.experts.28.w1, | |
| model.layers.4.feed_forward.experts.28.w3, model.layers.4.feed_forward.experts.29.w1, | |
| model.layers.4.feed_forward.experts.29.w3, model.layers.4.feed_forward.experts.30.w1, | |
| model.layers.4.feed_forward.experts.30.w3, model.layers.4.feed_forward.experts.31.w1, | |
| model.layers.4.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.5.operator_norm | |
| balance_layers: [model.layers.5.conv.in_proj] | |
| - smooth_layer: model.layers.5.ffn_norm | |
| balance_layers: [model.layers.5.feed_forward.gate, model.layers.5.feed_forward.experts.0.w1, | |
| model.layers.5.feed_forward.experts.0.w3, model.layers.5.feed_forward.experts.1.w1, | |
| model.layers.5.feed_forward.experts.1.w3, model.layers.5.feed_forward.experts.2.w1, | |
| model.layers.5.feed_forward.experts.2.w3, model.layers.5.feed_forward.experts.3.w1, | |
| model.layers.5.feed_forward.experts.3.w3, model.layers.5.feed_forward.experts.4.w1, | |
| model.layers.5.feed_forward.experts.4.w3, model.layers.5.feed_forward.experts.5.w1, | |
| model.layers.5.feed_forward.experts.5.w3, model.layers.5.feed_forward.experts.6.w1, | |
| model.layers.5.feed_forward.experts.6.w3, model.layers.5.feed_forward.experts.7.w1, | |
| model.layers.5.feed_forward.experts.7.w3, model.layers.5.feed_forward.experts.8.w1, | |
| model.layers.5.feed_forward.experts.8.w3, model.layers.5.feed_forward.experts.9.w1, | |
| model.layers.5.feed_forward.experts.9.w3, model.layers.5.feed_forward.experts.10.w1, | |
| model.layers.5.feed_forward.experts.10.w3, model.layers.5.feed_forward.experts.11.w1, | |
| model.layers.5.feed_forward.experts.11.w3, model.layers.5.feed_forward.experts.12.w1, | |
| model.layers.5.feed_forward.experts.12.w3, model.layers.5.feed_forward.experts.13.w1, | |
| model.layers.5.feed_forward.experts.13.w3, model.layers.5.feed_forward.experts.14.w1, | |
| model.layers.5.feed_forward.experts.14.w3, model.layers.5.feed_forward.experts.15.w1, | |
| model.layers.5.feed_forward.experts.15.w3, model.layers.5.feed_forward.experts.16.w1, | |
| model.layers.5.feed_forward.experts.16.w3, model.layers.5.feed_forward.experts.17.w1, | |
| model.layers.5.feed_forward.experts.17.w3, model.layers.5.feed_forward.experts.18.w1, | |
| model.layers.5.feed_forward.experts.18.w3, model.layers.5.feed_forward.experts.19.w1, | |
| model.layers.5.feed_forward.experts.19.w3, model.layers.5.feed_forward.experts.20.w1, | |
| model.layers.5.feed_forward.experts.20.w3, model.layers.5.feed_forward.experts.21.w1, | |
| model.layers.5.feed_forward.experts.21.w3, model.layers.5.feed_forward.experts.22.w1, | |
| model.layers.5.feed_forward.experts.22.w3, model.layers.5.feed_forward.experts.23.w1, | |
| model.layers.5.feed_forward.experts.23.w3, model.layers.5.feed_forward.experts.24.w1, | |
| model.layers.5.feed_forward.experts.24.w3, model.layers.5.feed_forward.experts.25.w1, | |
| model.layers.5.feed_forward.experts.25.w3, model.layers.5.feed_forward.experts.26.w1, | |
| model.layers.5.feed_forward.experts.26.w3, model.layers.5.feed_forward.experts.27.w1, | |
| model.layers.5.feed_forward.experts.27.w3, model.layers.5.feed_forward.experts.28.w1, | |
| model.layers.5.feed_forward.experts.28.w3, model.layers.5.feed_forward.experts.29.w1, | |
| model.layers.5.feed_forward.experts.29.w3, model.layers.5.feed_forward.experts.30.w1, | |
| model.layers.5.feed_forward.experts.30.w3, model.layers.5.feed_forward.experts.31.w1, | |
| model.layers.5.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.6.operator_norm | |
| balance_layers: [model.layers.6.self_attn.q_proj, model.layers.6.self_attn.k_proj, | |
| model.layers.6.self_attn.v_proj] | |
| - smooth_layer: model.layers.6.ffn_norm | |
| balance_layers: [model.layers.6.feed_forward.gate, model.layers.6.feed_forward.experts.0.w1, | |
| model.layers.6.feed_forward.experts.0.w3, model.layers.6.feed_forward.experts.1.w1, | |
| model.layers.6.feed_forward.experts.1.w3, model.layers.6.feed_forward.experts.2.w1, | |
| model.layers.6.feed_forward.experts.2.w3, model.layers.6.feed_forward.experts.3.w1, | |
| model.layers.6.feed_forward.experts.3.w3, model.layers.6.feed_forward.experts.4.w1, | |
| model.layers.6.feed_forward.experts.4.w3, model.layers.6.feed_forward.experts.5.w1, | |
| model.layers.6.feed_forward.experts.5.w3, model.layers.6.feed_forward.experts.6.w1, | |
| model.layers.6.feed_forward.experts.6.w3, model.layers.6.feed_forward.experts.7.w1, | |
| model.layers.6.feed_forward.experts.7.w3, model.layers.6.feed_forward.experts.8.w1, | |
| model.layers.6.feed_forward.experts.8.w3, model.layers.6.feed_forward.experts.9.w1, | |
| model.layers.6.feed_forward.experts.9.w3, model.layers.6.feed_forward.experts.10.w1, | |
| model.layers.6.feed_forward.experts.10.w3, model.layers.6.feed_forward.experts.11.w1, | |
| model.layers.6.feed_forward.experts.11.w3, model.layers.6.feed_forward.experts.12.w1, | |
| model.layers.6.feed_forward.experts.12.w3, model.layers.6.feed_forward.experts.13.w1, | |
| model.layers.6.feed_forward.experts.13.w3, model.layers.6.feed_forward.experts.14.w1, | |
| model.layers.6.feed_forward.experts.14.w3, model.layers.6.feed_forward.experts.15.w1, | |
| model.layers.6.feed_forward.experts.15.w3, model.layers.6.feed_forward.experts.16.w1, | |
| model.layers.6.feed_forward.experts.16.w3, model.layers.6.feed_forward.experts.17.w1, | |
| model.layers.6.feed_forward.experts.17.w3, model.layers.6.feed_forward.experts.18.w1, | |
| model.layers.6.feed_forward.experts.18.w3, model.layers.6.feed_forward.experts.19.w1, | |
| model.layers.6.feed_forward.experts.19.w3, model.layers.6.feed_forward.experts.20.w1, | |
| model.layers.6.feed_forward.experts.20.w3, model.layers.6.feed_forward.experts.21.w1, | |
| model.layers.6.feed_forward.experts.21.w3, model.layers.6.feed_forward.experts.22.w1, | |
| model.layers.6.feed_forward.experts.22.w3, model.layers.6.feed_forward.experts.23.w1, | |
| model.layers.6.feed_forward.experts.23.w3, model.layers.6.feed_forward.experts.24.w1, | |
| model.layers.6.feed_forward.experts.24.w3, model.layers.6.feed_forward.experts.25.w1, | |
| model.layers.6.feed_forward.experts.25.w3, model.layers.6.feed_forward.experts.26.w1, | |
| model.layers.6.feed_forward.experts.26.w3, model.layers.6.feed_forward.experts.27.w1, | |
| model.layers.6.feed_forward.experts.27.w3, model.layers.6.feed_forward.experts.28.w1, | |
| model.layers.6.feed_forward.experts.28.w3, model.layers.6.feed_forward.experts.29.w1, | |
| model.layers.6.feed_forward.experts.29.w3, model.layers.6.feed_forward.experts.30.w1, | |
| model.layers.6.feed_forward.experts.30.w3, model.layers.6.feed_forward.experts.31.w1, | |
| model.layers.6.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.7.operator_norm | |
| balance_layers: [model.layers.7.conv.in_proj] | |
| - smooth_layer: model.layers.7.ffn_norm | |
| balance_layers: [model.layers.7.feed_forward.gate, model.layers.7.feed_forward.experts.0.w1, | |
| model.layers.7.feed_forward.experts.0.w3, model.layers.7.feed_forward.experts.1.w1, | |
| model.layers.7.feed_forward.experts.1.w3, model.layers.7.feed_forward.experts.2.w1, | |
| model.layers.7.feed_forward.experts.2.w3, model.layers.7.feed_forward.experts.3.w1, | |
| model.layers.7.feed_forward.experts.3.w3, model.layers.7.feed_forward.experts.4.w1, | |
| model.layers.7.feed_forward.experts.4.w3, model.layers.7.feed_forward.experts.5.w1, | |
| model.layers.7.feed_forward.experts.5.w3, model.layers.7.feed_forward.experts.6.w1, | |
| model.layers.7.feed_forward.experts.6.w3, model.layers.7.feed_forward.experts.7.w1, | |
| model.layers.7.feed_forward.experts.7.w3, model.layers.7.feed_forward.experts.8.w1, | |
| model.layers.7.feed_forward.experts.8.w3, model.layers.7.feed_forward.experts.9.w1, | |
| model.layers.7.feed_forward.experts.9.w3, model.layers.7.feed_forward.experts.10.w1, | |
| model.layers.7.feed_forward.experts.10.w3, model.layers.7.feed_forward.experts.11.w1, | |
| model.layers.7.feed_forward.experts.11.w3, model.layers.7.feed_forward.experts.12.w1, | |
| model.layers.7.feed_forward.experts.12.w3, model.layers.7.feed_forward.experts.13.w1, | |
| model.layers.7.feed_forward.experts.13.w3, model.layers.7.feed_forward.experts.14.w1, | |
| model.layers.7.feed_forward.experts.14.w3, model.layers.7.feed_forward.experts.15.w1, | |
| model.layers.7.feed_forward.experts.15.w3, model.layers.7.feed_forward.experts.16.w1, | |
| model.layers.7.feed_forward.experts.16.w3, model.layers.7.feed_forward.experts.17.w1, | |
| model.layers.7.feed_forward.experts.17.w3, model.layers.7.feed_forward.experts.18.w1, | |
| model.layers.7.feed_forward.experts.18.w3, model.layers.7.feed_forward.experts.19.w1, | |
| model.layers.7.feed_forward.experts.19.w3, model.layers.7.feed_forward.experts.20.w1, | |
| model.layers.7.feed_forward.experts.20.w3, model.layers.7.feed_forward.experts.21.w1, | |
| model.layers.7.feed_forward.experts.21.w3, model.layers.7.feed_forward.experts.22.w1, | |
| model.layers.7.feed_forward.experts.22.w3, model.layers.7.feed_forward.experts.23.w1, | |
| model.layers.7.feed_forward.experts.23.w3, model.layers.7.feed_forward.experts.24.w1, | |
| model.layers.7.feed_forward.experts.24.w3, model.layers.7.feed_forward.experts.25.w1, | |
| model.layers.7.feed_forward.experts.25.w3, model.layers.7.feed_forward.experts.26.w1, | |
| model.layers.7.feed_forward.experts.26.w3, model.layers.7.feed_forward.experts.27.w1, | |
| model.layers.7.feed_forward.experts.27.w3, model.layers.7.feed_forward.experts.28.w1, | |
| model.layers.7.feed_forward.experts.28.w3, model.layers.7.feed_forward.experts.29.w1, | |
| model.layers.7.feed_forward.experts.29.w3, model.layers.7.feed_forward.experts.30.w1, | |
| model.layers.7.feed_forward.experts.30.w3, model.layers.7.feed_forward.experts.31.w1, | |
| model.layers.7.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.8.operator_norm | |
| balance_layers: [model.layers.8.conv.in_proj] | |
| - smooth_layer: model.layers.8.ffn_norm | |
| balance_layers: [model.layers.8.feed_forward.gate, model.layers.8.feed_forward.experts.0.w1, | |
| model.layers.8.feed_forward.experts.0.w3, model.layers.8.feed_forward.experts.1.w1, | |
| model.layers.8.feed_forward.experts.1.w3, model.layers.8.feed_forward.experts.2.w1, | |
| model.layers.8.feed_forward.experts.2.w3, model.layers.8.feed_forward.experts.3.w1, | |
| model.layers.8.feed_forward.experts.3.w3, model.layers.8.feed_forward.experts.4.w1, | |
| model.layers.8.feed_forward.experts.4.w3, model.layers.8.feed_forward.experts.5.w1, | |
| model.layers.8.feed_forward.experts.5.w3, model.layers.8.feed_forward.experts.6.w1, | |
| model.layers.8.feed_forward.experts.6.w3, model.layers.8.feed_forward.experts.7.w1, | |
| model.layers.8.feed_forward.experts.7.w3, model.layers.8.feed_forward.experts.8.w1, | |
| model.layers.8.feed_forward.experts.8.w3, model.layers.8.feed_forward.experts.9.w1, | |
| model.layers.8.feed_forward.experts.9.w3, model.layers.8.feed_forward.experts.10.w1, | |
| model.layers.8.feed_forward.experts.10.w3, model.layers.8.feed_forward.experts.11.w1, | |
| model.layers.8.feed_forward.experts.11.w3, model.layers.8.feed_forward.experts.12.w1, | |
| model.layers.8.feed_forward.experts.12.w3, model.layers.8.feed_forward.experts.13.w1, | |
| model.layers.8.feed_forward.experts.13.w3, model.layers.8.feed_forward.experts.14.w1, | |
| model.layers.8.feed_forward.experts.14.w3, model.layers.8.feed_forward.experts.15.w1, | |
| model.layers.8.feed_forward.experts.15.w3, model.layers.8.feed_forward.experts.16.w1, | |
| model.layers.8.feed_forward.experts.16.w3, model.layers.8.feed_forward.experts.17.w1, | |
| model.layers.8.feed_forward.experts.17.w3, model.layers.8.feed_forward.experts.18.w1, | |
| model.layers.8.feed_forward.experts.18.w3, model.layers.8.feed_forward.experts.19.w1, | |
| model.layers.8.feed_forward.experts.19.w3, model.layers.8.feed_forward.experts.20.w1, | |
| model.layers.8.feed_forward.experts.20.w3, model.layers.8.feed_forward.experts.21.w1, | |
| model.layers.8.feed_forward.experts.21.w3, model.layers.8.feed_forward.experts.22.w1, | |
| model.layers.8.feed_forward.experts.22.w3, model.layers.8.feed_forward.experts.23.w1, | |
| model.layers.8.feed_forward.experts.23.w3, model.layers.8.feed_forward.experts.24.w1, | |
| model.layers.8.feed_forward.experts.24.w3, model.layers.8.feed_forward.experts.25.w1, | |
| model.layers.8.feed_forward.experts.25.w3, model.layers.8.feed_forward.experts.26.w1, | |
| model.layers.8.feed_forward.experts.26.w3, model.layers.8.feed_forward.experts.27.w1, | |
| model.layers.8.feed_forward.experts.27.w3, model.layers.8.feed_forward.experts.28.w1, | |
| model.layers.8.feed_forward.experts.28.w3, model.layers.8.feed_forward.experts.29.w1, | |
| model.layers.8.feed_forward.experts.29.w3, model.layers.8.feed_forward.experts.30.w1, | |
| model.layers.8.feed_forward.experts.30.w3, model.layers.8.feed_forward.experts.31.w1, | |
| model.layers.8.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.9.operator_norm | |
| balance_layers: [model.layers.9.conv.in_proj] | |
| - smooth_layer: model.layers.9.ffn_norm | |
| balance_layers: [model.layers.9.feed_forward.gate, model.layers.9.feed_forward.experts.0.w1, | |
| model.layers.9.feed_forward.experts.0.w3, model.layers.9.feed_forward.experts.1.w1, | |
| model.layers.9.feed_forward.experts.1.w3, model.layers.9.feed_forward.experts.2.w1, | |
| model.layers.9.feed_forward.experts.2.w3, model.layers.9.feed_forward.experts.3.w1, | |
| model.layers.9.feed_forward.experts.3.w3, model.layers.9.feed_forward.experts.4.w1, | |
| model.layers.9.feed_forward.experts.4.w3, model.layers.9.feed_forward.experts.5.w1, | |
| model.layers.9.feed_forward.experts.5.w3, model.layers.9.feed_forward.experts.6.w1, | |
| model.layers.9.feed_forward.experts.6.w3, model.layers.9.feed_forward.experts.7.w1, | |
| model.layers.9.feed_forward.experts.7.w3, model.layers.9.feed_forward.experts.8.w1, | |
| model.layers.9.feed_forward.experts.8.w3, model.layers.9.feed_forward.experts.9.w1, | |
| model.layers.9.feed_forward.experts.9.w3, model.layers.9.feed_forward.experts.10.w1, | |
| model.layers.9.feed_forward.experts.10.w3, model.layers.9.feed_forward.experts.11.w1, | |
| model.layers.9.feed_forward.experts.11.w3, model.layers.9.feed_forward.experts.12.w1, | |
| model.layers.9.feed_forward.experts.12.w3, model.layers.9.feed_forward.experts.13.w1, | |
| model.layers.9.feed_forward.experts.13.w3, model.layers.9.feed_forward.experts.14.w1, | |
| model.layers.9.feed_forward.experts.14.w3, model.layers.9.feed_forward.experts.15.w1, | |
| model.layers.9.feed_forward.experts.15.w3, model.layers.9.feed_forward.experts.16.w1, | |
| model.layers.9.feed_forward.experts.16.w3, model.layers.9.feed_forward.experts.17.w1, | |
| model.layers.9.feed_forward.experts.17.w3, model.layers.9.feed_forward.experts.18.w1, | |
| model.layers.9.feed_forward.experts.18.w3, model.layers.9.feed_forward.experts.19.w1, | |
| model.layers.9.feed_forward.experts.19.w3, model.layers.9.feed_forward.experts.20.w1, | |
| model.layers.9.feed_forward.experts.20.w3, model.layers.9.feed_forward.experts.21.w1, | |
| model.layers.9.feed_forward.experts.21.w3, model.layers.9.feed_forward.experts.22.w1, | |
| model.layers.9.feed_forward.experts.22.w3, model.layers.9.feed_forward.experts.23.w1, | |
| model.layers.9.feed_forward.experts.23.w3, model.layers.9.feed_forward.experts.24.w1, | |
| model.layers.9.feed_forward.experts.24.w3, model.layers.9.feed_forward.experts.25.w1, | |
| model.layers.9.feed_forward.experts.25.w3, model.layers.9.feed_forward.experts.26.w1, | |
| model.layers.9.feed_forward.experts.26.w3, model.layers.9.feed_forward.experts.27.w1, | |
| model.layers.9.feed_forward.experts.27.w3, model.layers.9.feed_forward.experts.28.w1, | |
| model.layers.9.feed_forward.experts.28.w3, model.layers.9.feed_forward.experts.29.w1, | |
| model.layers.9.feed_forward.experts.29.w3, model.layers.9.feed_forward.experts.30.w1, | |
| model.layers.9.feed_forward.experts.30.w3, model.layers.9.feed_forward.experts.31.w1, | |
| model.layers.9.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.10.operator_norm | |
| balance_layers: [model.layers.10.self_attn.q_proj, model.layers.10.self_attn.k_proj, | |
| model.layers.10.self_attn.v_proj] | |
| - smooth_layer: model.layers.10.ffn_norm | |
| balance_layers: [model.layers.10.feed_forward.gate, model.layers.10.feed_forward.experts.0.w1, | |
| model.layers.10.feed_forward.experts.0.w3, model.layers.10.feed_forward.experts.1.w1, | |
| model.layers.10.feed_forward.experts.1.w3, model.layers.10.feed_forward.experts.2.w1, | |
| model.layers.10.feed_forward.experts.2.w3, model.layers.10.feed_forward.experts.3.w1, | |
| model.layers.10.feed_forward.experts.3.w3, model.layers.10.feed_forward.experts.4.w1, | |
| model.layers.10.feed_forward.experts.4.w3, model.layers.10.feed_forward.experts.5.w1, | |
| model.layers.10.feed_forward.experts.5.w3, model.layers.10.feed_forward.experts.6.w1, | |
| model.layers.10.feed_forward.experts.6.w3, model.layers.10.feed_forward.experts.7.w1, | |
| model.layers.10.feed_forward.experts.7.w3, model.layers.10.feed_forward.experts.8.w1, | |
| model.layers.10.feed_forward.experts.8.w3, model.layers.10.feed_forward.experts.9.w1, | |
| model.layers.10.feed_forward.experts.9.w3, model.layers.10.feed_forward.experts.10.w1, | |
| model.layers.10.feed_forward.experts.10.w3, model.layers.10.feed_forward.experts.11.w1, | |
| model.layers.10.feed_forward.experts.11.w3, model.layers.10.feed_forward.experts.12.w1, | |
| model.layers.10.feed_forward.experts.12.w3, model.layers.10.feed_forward.experts.13.w1, | |
| model.layers.10.feed_forward.experts.13.w3, model.layers.10.feed_forward.experts.14.w1, | |
| model.layers.10.feed_forward.experts.14.w3, model.layers.10.feed_forward.experts.15.w1, | |
| model.layers.10.feed_forward.experts.15.w3, model.layers.10.feed_forward.experts.16.w1, | |
| model.layers.10.feed_forward.experts.16.w3, model.layers.10.feed_forward.experts.17.w1, | |
| model.layers.10.feed_forward.experts.17.w3, model.layers.10.feed_forward.experts.18.w1, | |
| model.layers.10.feed_forward.experts.18.w3, model.layers.10.feed_forward.experts.19.w1, | |
| model.layers.10.feed_forward.experts.19.w3, model.layers.10.feed_forward.experts.20.w1, | |
| model.layers.10.feed_forward.experts.20.w3, model.layers.10.feed_forward.experts.21.w1, | |
| model.layers.10.feed_forward.experts.21.w3, model.layers.10.feed_forward.experts.22.w1, | |
| model.layers.10.feed_forward.experts.22.w3, model.layers.10.feed_forward.experts.23.w1, | |
| model.layers.10.feed_forward.experts.23.w3, model.layers.10.feed_forward.experts.24.w1, | |
| model.layers.10.feed_forward.experts.24.w3, model.layers.10.feed_forward.experts.25.w1, | |
| model.layers.10.feed_forward.experts.25.w3, model.layers.10.feed_forward.experts.26.w1, | |
| model.layers.10.feed_forward.experts.26.w3, model.layers.10.feed_forward.experts.27.w1, | |
| model.layers.10.feed_forward.experts.27.w3, model.layers.10.feed_forward.experts.28.w1, | |
| model.layers.10.feed_forward.experts.28.w3, model.layers.10.feed_forward.experts.29.w1, | |
| model.layers.10.feed_forward.experts.29.w3, model.layers.10.feed_forward.experts.30.w1, | |
| model.layers.10.feed_forward.experts.30.w3, model.layers.10.feed_forward.experts.31.w1, | |
| model.layers.10.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.11.operator_norm | |
| balance_layers: [model.layers.11.conv.in_proj] | |
| - smooth_layer: model.layers.11.ffn_norm | |
| balance_layers: [model.layers.11.feed_forward.gate, model.layers.11.feed_forward.experts.0.w1, | |
| model.layers.11.feed_forward.experts.0.w3, model.layers.11.feed_forward.experts.1.w1, | |
| model.layers.11.feed_forward.experts.1.w3, model.layers.11.feed_forward.experts.2.w1, | |
| model.layers.11.feed_forward.experts.2.w3, model.layers.11.feed_forward.experts.3.w1, | |
| model.layers.11.feed_forward.experts.3.w3, model.layers.11.feed_forward.experts.4.w1, | |
| model.layers.11.feed_forward.experts.4.w3, model.layers.11.feed_forward.experts.5.w1, | |
| model.layers.11.feed_forward.experts.5.w3, model.layers.11.feed_forward.experts.6.w1, | |
| model.layers.11.feed_forward.experts.6.w3, model.layers.11.feed_forward.experts.7.w1, | |
| model.layers.11.feed_forward.experts.7.w3, model.layers.11.feed_forward.experts.8.w1, | |
| model.layers.11.feed_forward.experts.8.w3, model.layers.11.feed_forward.experts.9.w1, | |
| model.layers.11.feed_forward.experts.9.w3, model.layers.11.feed_forward.experts.10.w1, | |
| model.layers.11.feed_forward.experts.10.w3, model.layers.11.feed_forward.experts.11.w1, | |
| model.layers.11.feed_forward.experts.11.w3, model.layers.11.feed_forward.experts.12.w1, | |
| model.layers.11.feed_forward.experts.12.w3, model.layers.11.feed_forward.experts.13.w1, | |
| model.layers.11.feed_forward.experts.13.w3, model.layers.11.feed_forward.experts.14.w1, | |
| model.layers.11.feed_forward.experts.14.w3, model.layers.11.feed_forward.experts.15.w1, | |
| model.layers.11.feed_forward.experts.15.w3, model.layers.11.feed_forward.experts.16.w1, | |
| model.layers.11.feed_forward.experts.16.w3, model.layers.11.feed_forward.experts.17.w1, | |
| model.layers.11.feed_forward.experts.17.w3, model.layers.11.feed_forward.experts.18.w1, | |
| model.layers.11.feed_forward.experts.18.w3, model.layers.11.feed_forward.experts.19.w1, | |
| model.layers.11.feed_forward.experts.19.w3, model.layers.11.feed_forward.experts.20.w1, | |
| model.layers.11.feed_forward.experts.20.w3, model.layers.11.feed_forward.experts.21.w1, | |
| model.layers.11.feed_forward.experts.21.w3, model.layers.11.feed_forward.experts.22.w1, | |
| model.layers.11.feed_forward.experts.22.w3, model.layers.11.feed_forward.experts.23.w1, | |
| model.layers.11.feed_forward.experts.23.w3, model.layers.11.feed_forward.experts.24.w1, | |
| model.layers.11.feed_forward.experts.24.w3, model.layers.11.feed_forward.experts.25.w1, | |
| model.layers.11.feed_forward.experts.25.w3, model.layers.11.feed_forward.experts.26.w1, | |
| model.layers.11.feed_forward.experts.26.w3, model.layers.11.feed_forward.experts.27.w1, | |
| model.layers.11.feed_forward.experts.27.w3, model.layers.11.feed_forward.experts.28.w1, | |
| model.layers.11.feed_forward.experts.28.w3, model.layers.11.feed_forward.experts.29.w1, | |
| model.layers.11.feed_forward.experts.29.w3, model.layers.11.feed_forward.experts.30.w1, | |
| model.layers.11.feed_forward.experts.30.w3, model.layers.11.feed_forward.experts.31.w1, | |
| model.layers.11.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.12.operator_norm | |
| balance_layers: [model.layers.12.conv.in_proj] | |
| - smooth_layer: model.layers.12.ffn_norm | |
| balance_layers: [model.layers.12.feed_forward.gate, model.layers.12.feed_forward.experts.0.w1, | |
| model.layers.12.feed_forward.experts.0.w3, model.layers.12.feed_forward.experts.1.w1, | |
| model.layers.12.feed_forward.experts.1.w3, model.layers.12.feed_forward.experts.2.w1, | |
| model.layers.12.feed_forward.experts.2.w3, model.layers.12.feed_forward.experts.3.w1, | |
| model.layers.12.feed_forward.experts.3.w3, model.layers.12.feed_forward.experts.4.w1, | |
| model.layers.12.feed_forward.experts.4.w3, model.layers.12.feed_forward.experts.5.w1, | |
| model.layers.12.feed_forward.experts.5.w3, model.layers.12.feed_forward.experts.6.w1, | |
| model.layers.12.feed_forward.experts.6.w3, model.layers.12.feed_forward.experts.7.w1, | |
| model.layers.12.feed_forward.experts.7.w3, model.layers.12.feed_forward.experts.8.w1, | |
| model.layers.12.feed_forward.experts.8.w3, model.layers.12.feed_forward.experts.9.w1, | |
| model.layers.12.feed_forward.experts.9.w3, model.layers.12.feed_forward.experts.10.w1, | |
| model.layers.12.feed_forward.experts.10.w3, model.layers.12.feed_forward.experts.11.w1, | |
| model.layers.12.feed_forward.experts.11.w3, model.layers.12.feed_forward.experts.12.w1, | |
| model.layers.12.feed_forward.experts.12.w3, model.layers.12.feed_forward.experts.13.w1, | |
| model.layers.12.feed_forward.experts.13.w3, model.layers.12.feed_forward.experts.14.w1, | |
| model.layers.12.feed_forward.experts.14.w3, model.layers.12.feed_forward.experts.15.w1, | |
| model.layers.12.feed_forward.experts.15.w3, model.layers.12.feed_forward.experts.16.w1, | |
| model.layers.12.feed_forward.experts.16.w3, model.layers.12.feed_forward.experts.17.w1, | |
| model.layers.12.feed_forward.experts.17.w3, model.layers.12.feed_forward.experts.18.w1, | |
| model.layers.12.feed_forward.experts.18.w3, model.layers.12.feed_forward.experts.19.w1, | |
| model.layers.12.feed_forward.experts.19.w3, model.layers.12.feed_forward.experts.20.w1, | |
| model.layers.12.feed_forward.experts.20.w3, model.layers.12.feed_forward.experts.21.w1, | |
| model.layers.12.feed_forward.experts.21.w3, model.layers.12.feed_forward.experts.22.w1, | |
| model.layers.12.feed_forward.experts.22.w3, model.layers.12.feed_forward.experts.23.w1, | |
| model.layers.12.feed_forward.experts.23.w3, model.layers.12.feed_forward.experts.24.w1, | |
| model.layers.12.feed_forward.experts.24.w3, model.layers.12.feed_forward.experts.25.w1, | |
| model.layers.12.feed_forward.experts.25.w3, model.layers.12.feed_forward.experts.26.w1, | |
| model.layers.12.feed_forward.experts.26.w3, model.layers.12.feed_forward.experts.27.w1, | |
| model.layers.12.feed_forward.experts.27.w3, model.layers.12.feed_forward.experts.28.w1, | |
| model.layers.12.feed_forward.experts.28.w3, model.layers.12.feed_forward.experts.29.w1, | |
| model.layers.12.feed_forward.experts.29.w3, model.layers.12.feed_forward.experts.30.w1, | |
| model.layers.12.feed_forward.experts.30.w3, model.layers.12.feed_forward.experts.31.w1, | |
| model.layers.12.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.13.operator_norm | |
| balance_layers: [model.layers.13.conv.in_proj] | |
| - smooth_layer: model.layers.13.ffn_norm | |
| balance_layers: [model.layers.13.feed_forward.gate, model.layers.13.feed_forward.experts.0.w1, | |
| model.layers.13.feed_forward.experts.0.w3, model.layers.13.feed_forward.experts.1.w1, | |
| model.layers.13.feed_forward.experts.1.w3, model.layers.13.feed_forward.experts.2.w1, | |
| model.layers.13.feed_forward.experts.2.w3, model.layers.13.feed_forward.experts.3.w1, | |
| model.layers.13.feed_forward.experts.3.w3, model.layers.13.feed_forward.experts.4.w1, | |
| model.layers.13.feed_forward.experts.4.w3, model.layers.13.feed_forward.experts.5.w1, | |
| model.layers.13.feed_forward.experts.5.w3, model.layers.13.feed_forward.experts.6.w1, | |
| model.layers.13.feed_forward.experts.6.w3, model.layers.13.feed_forward.experts.7.w1, | |
| model.layers.13.feed_forward.experts.7.w3, model.layers.13.feed_forward.experts.8.w1, | |
| model.layers.13.feed_forward.experts.8.w3, model.layers.13.feed_forward.experts.9.w1, | |
| model.layers.13.feed_forward.experts.9.w3, model.layers.13.feed_forward.experts.10.w1, | |
| model.layers.13.feed_forward.experts.10.w3, model.layers.13.feed_forward.experts.11.w1, | |
| model.layers.13.feed_forward.experts.11.w3, model.layers.13.feed_forward.experts.12.w1, | |
| model.layers.13.feed_forward.experts.12.w3, model.layers.13.feed_forward.experts.13.w1, | |
| model.layers.13.feed_forward.experts.13.w3, model.layers.13.feed_forward.experts.14.w1, | |
| model.layers.13.feed_forward.experts.14.w3, model.layers.13.feed_forward.experts.15.w1, | |
| model.layers.13.feed_forward.experts.15.w3, model.layers.13.feed_forward.experts.16.w1, | |
| model.layers.13.feed_forward.experts.16.w3, model.layers.13.feed_forward.experts.17.w1, | |
| model.layers.13.feed_forward.experts.17.w3, model.layers.13.feed_forward.experts.18.w1, | |
| model.layers.13.feed_forward.experts.18.w3, model.layers.13.feed_forward.experts.19.w1, | |
| model.layers.13.feed_forward.experts.19.w3, model.layers.13.feed_forward.experts.20.w1, | |
| model.layers.13.feed_forward.experts.20.w3, model.layers.13.feed_forward.experts.21.w1, | |
| model.layers.13.feed_forward.experts.21.w3, model.layers.13.feed_forward.experts.22.w1, | |
| model.layers.13.feed_forward.experts.22.w3, model.layers.13.feed_forward.experts.23.w1, | |
| model.layers.13.feed_forward.experts.23.w3, model.layers.13.feed_forward.experts.24.w1, | |
| model.layers.13.feed_forward.experts.24.w3, model.layers.13.feed_forward.experts.25.w1, | |
| model.layers.13.feed_forward.experts.25.w3, model.layers.13.feed_forward.experts.26.w1, | |
| model.layers.13.feed_forward.experts.26.w3, model.layers.13.feed_forward.experts.27.w1, | |
| model.layers.13.feed_forward.experts.27.w3, model.layers.13.feed_forward.experts.28.w1, | |
| model.layers.13.feed_forward.experts.28.w3, model.layers.13.feed_forward.experts.29.w1, | |
| model.layers.13.feed_forward.experts.29.w3, model.layers.13.feed_forward.experts.30.w1, | |
| model.layers.13.feed_forward.experts.30.w3, model.layers.13.feed_forward.experts.31.w1, | |
| model.layers.13.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.14.operator_norm | |
| balance_layers: [model.layers.14.self_attn.q_proj, model.layers.14.self_attn.k_proj, | |
| model.layers.14.self_attn.v_proj] | |
| - smooth_layer: model.layers.14.ffn_norm | |
| balance_layers: [model.layers.14.feed_forward.gate, model.layers.14.feed_forward.experts.0.w1, | |
| model.layers.14.feed_forward.experts.0.w3, model.layers.14.feed_forward.experts.1.w1, | |
| model.layers.14.feed_forward.experts.1.w3, model.layers.14.feed_forward.experts.2.w1, | |
| model.layers.14.feed_forward.experts.2.w3, model.layers.14.feed_forward.experts.3.w1, | |
| model.layers.14.feed_forward.experts.3.w3, model.layers.14.feed_forward.experts.4.w1, | |
| model.layers.14.feed_forward.experts.4.w3, model.layers.14.feed_forward.experts.5.w1, | |
| model.layers.14.feed_forward.experts.5.w3, model.layers.14.feed_forward.experts.6.w1, | |
| model.layers.14.feed_forward.experts.6.w3, model.layers.14.feed_forward.experts.7.w1, | |
| model.layers.14.feed_forward.experts.7.w3, model.layers.14.feed_forward.experts.8.w1, | |
| model.layers.14.feed_forward.experts.8.w3, model.layers.14.feed_forward.experts.9.w1, | |
| model.layers.14.feed_forward.experts.9.w3, model.layers.14.feed_forward.experts.10.w1, | |
| model.layers.14.feed_forward.experts.10.w3, model.layers.14.feed_forward.experts.11.w1, | |
| model.layers.14.feed_forward.experts.11.w3, model.layers.14.feed_forward.experts.12.w1, | |
| model.layers.14.feed_forward.experts.12.w3, model.layers.14.feed_forward.experts.13.w1, | |
| model.layers.14.feed_forward.experts.13.w3, model.layers.14.feed_forward.experts.14.w1, | |
| model.layers.14.feed_forward.experts.14.w3, model.layers.14.feed_forward.experts.15.w1, | |
| model.layers.14.feed_forward.experts.15.w3, model.layers.14.feed_forward.experts.16.w1, | |
| model.layers.14.feed_forward.experts.16.w3, model.layers.14.feed_forward.experts.17.w1, | |
| model.layers.14.feed_forward.experts.17.w3, model.layers.14.feed_forward.experts.18.w1, | |
| model.layers.14.feed_forward.experts.18.w3, model.layers.14.feed_forward.experts.19.w1, | |
| model.layers.14.feed_forward.experts.19.w3, model.layers.14.feed_forward.experts.20.w1, | |
| model.layers.14.feed_forward.experts.20.w3, model.layers.14.feed_forward.experts.21.w1, | |
| model.layers.14.feed_forward.experts.21.w3, model.layers.14.feed_forward.experts.22.w1, | |
| model.layers.14.feed_forward.experts.22.w3, model.layers.14.feed_forward.experts.23.w1, | |
| model.layers.14.feed_forward.experts.23.w3, model.layers.14.feed_forward.experts.24.w1, | |
| model.layers.14.feed_forward.experts.24.w3, model.layers.14.feed_forward.experts.25.w1, | |
| model.layers.14.feed_forward.experts.25.w3, model.layers.14.feed_forward.experts.26.w1, | |
| model.layers.14.feed_forward.experts.26.w3, model.layers.14.feed_forward.experts.27.w1, | |
| model.layers.14.feed_forward.experts.27.w3, model.layers.14.feed_forward.experts.28.w1, | |
| model.layers.14.feed_forward.experts.28.w3, model.layers.14.feed_forward.experts.29.w1, | |
| model.layers.14.feed_forward.experts.29.w3, model.layers.14.feed_forward.experts.30.w1, | |
| model.layers.14.feed_forward.experts.30.w3, model.layers.14.feed_forward.experts.31.w1, | |
| model.layers.14.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.15.operator_norm | |
| balance_layers: [model.layers.15.conv.in_proj] | |
| - smooth_layer: model.layers.15.ffn_norm | |
| balance_layers: [model.layers.15.feed_forward.gate, model.layers.15.feed_forward.experts.0.w1, | |
| model.layers.15.feed_forward.experts.0.w3, model.layers.15.feed_forward.experts.1.w1, | |
| model.layers.15.feed_forward.experts.1.w3, model.layers.15.feed_forward.experts.2.w1, | |
| model.layers.15.feed_forward.experts.2.w3, model.layers.15.feed_forward.experts.3.w1, | |
| model.layers.15.feed_forward.experts.3.w3, model.layers.15.feed_forward.experts.4.w1, | |
| model.layers.15.feed_forward.experts.4.w3, model.layers.15.feed_forward.experts.5.w1, | |
| model.layers.15.feed_forward.experts.5.w3, model.layers.15.feed_forward.experts.6.w1, | |
| model.layers.15.feed_forward.experts.6.w3, model.layers.15.feed_forward.experts.7.w1, | |
| model.layers.15.feed_forward.experts.7.w3, model.layers.15.feed_forward.experts.8.w1, | |
| model.layers.15.feed_forward.experts.8.w3, model.layers.15.feed_forward.experts.9.w1, | |
| model.layers.15.feed_forward.experts.9.w3, model.layers.15.feed_forward.experts.10.w1, | |
| model.layers.15.feed_forward.experts.10.w3, model.layers.15.feed_forward.experts.11.w1, | |
| model.layers.15.feed_forward.experts.11.w3, model.layers.15.feed_forward.experts.12.w1, | |
| model.layers.15.feed_forward.experts.12.w3, model.layers.15.feed_forward.experts.13.w1, | |
| model.layers.15.feed_forward.experts.13.w3, model.layers.15.feed_forward.experts.14.w1, | |
| model.layers.15.feed_forward.experts.14.w3, model.layers.15.feed_forward.experts.15.w1, | |
| model.layers.15.feed_forward.experts.15.w3, model.layers.15.feed_forward.experts.16.w1, | |
| model.layers.15.feed_forward.experts.16.w3, model.layers.15.feed_forward.experts.17.w1, | |
| model.layers.15.feed_forward.experts.17.w3, model.layers.15.feed_forward.experts.18.w1, | |
| model.layers.15.feed_forward.experts.18.w3, model.layers.15.feed_forward.experts.19.w1, | |
| model.layers.15.feed_forward.experts.19.w3, model.layers.15.feed_forward.experts.20.w1, | |
| model.layers.15.feed_forward.experts.20.w3, model.layers.15.feed_forward.experts.21.w1, | |
| model.layers.15.feed_forward.experts.21.w3, model.layers.15.feed_forward.experts.22.w1, | |
| model.layers.15.feed_forward.experts.22.w3, model.layers.15.feed_forward.experts.23.w1, | |
| model.layers.15.feed_forward.experts.23.w3, model.layers.15.feed_forward.experts.24.w1, | |
| model.layers.15.feed_forward.experts.24.w3, model.layers.15.feed_forward.experts.25.w1, | |
| model.layers.15.feed_forward.experts.25.w3, model.layers.15.feed_forward.experts.26.w1, | |
| model.layers.15.feed_forward.experts.26.w3, model.layers.15.feed_forward.experts.27.w1, | |
| model.layers.15.feed_forward.experts.27.w3, model.layers.15.feed_forward.experts.28.w1, | |
| model.layers.15.feed_forward.experts.28.w3, model.layers.15.feed_forward.experts.29.w1, | |
| model.layers.15.feed_forward.experts.29.w3, model.layers.15.feed_forward.experts.30.w1, | |
| model.layers.15.feed_forward.experts.30.w3, model.layers.15.feed_forward.experts.31.w1, | |
| model.layers.15.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.16.operator_norm | |
| balance_layers: [model.layers.16.conv.in_proj] | |
| - smooth_layer: model.layers.16.ffn_norm | |
| balance_layers: [model.layers.16.feed_forward.gate, model.layers.16.feed_forward.experts.0.w1, | |
| model.layers.16.feed_forward.experts.0.w3, model.layers.16.feed_forward.experts.1.w1, | |
| model.layers.16.feed_forward.experts.1.w3, model.layers.16.feed_forward.experts.2.w1, | |
| model.layers.16.feed_forward.experts.2.w3, model.layers.16.feed_forward.experts.3.w1, | |
| model.layers.16.feed_forward.experts.3.w3, model.layers.16.feed_forward.experts.4.w1, | |
| model.layers.16.feed_forward.experts.4.w3, model.layers.16.feed_forward.experts.5.w1, | |
| model.layers.16.feed_forward.experts.5.w3, model.layers.16.feed_forward.experts.6.w1, | |
| model.layers.16.feed_forward.experts.6.w3, model.layers.16.feed_forward.experts.7.w1, | |
| model.layers.16.feed_forward.experts.7.w3, model.layers.16.feed_forward.experts.8.w1, | |
| model.layers.16.feed_forward.experts.8.w3, model.layers.16.feed_forward.experts.9.w1, | |
| model.layers.16.feed_forward.experts.9.w3, model.layers.16.feed_forward.experts.10.w1, | |
| model.layers.16.feed_forward.experts.10.w3, model.layers.16.feed_forward.experts.11.w1, | |
| model.layers.16.feed_forward.experts.11.w3, model.layers.16.feed_forward.experts.12.w1, | |
| model.layers.16.feed_forward.experts.12.w3, model.layers.16.feed_forward.experts.13.w1, | |
| model.layers.16.feed_forward.experts.13.w3, model.layers.16.feed_forward.experts.14.w1, | |
| model.layers.16.feed_forward.experts.14.w3, model.layers.16.feed_forward.experts.15.w1, | |
| model.layers.16.feed_forward.experts.15.w3, model.layers.16.feed_forward.experts.16.w1, | |
| model.layers.16.feed_forward.experts.16.w3, model.layers.16.feed_forward.experts.17.w1, | |
| model.layers.16.feed_forward.experts.17.w3, model.layers.16.feed_forward.experts.18.w1, | |
| model.layers.16.feed_forward.experts.18.w3, model.layers.16.feed_forward.experts.19.w1, | |
| model.layers.16.feed_forward.experts.19.w3, model.layers.16.feed_forward.experts.20.w1, | |
| model.layers.16.feed_forward.experts.20.w3, model.layers.16.feed_forward.experts.21.w1, | |
| model.layers.16.feed_forward.experts.21.w3, model.layers.16.feed_forward.experts.22.w1, | |
| model.layers.16.feed_forward.experts.22.w3, model.layers.16.feed_forward.experts.23.w1, | |
| model.layers.16.feed_forward.experts.23.w3, model.layers.16.feed_forward.experts.24.w1, | |
| model.layers.16.feed_forward.experts.24.w3, model.layers.16.feed_forward.experts.25.w1, | |
| model.layers.16.feed_forward.experts.25.w3, model.layers.16.feed_forward.experts.26.w1, | |
| model.layers.16.feed_forward.experts.26.w3, model.layers.16.feed_forward.experts.27.w1, | |
| model.layers.16.feed_forward.experts.27.w3, model.layers.16.feed_forward.experts.28.w1, | |
| model.layers.16.feed_forward.experts.28.w3, model.layers.16.feed_forward.experts.29.w1, | |
| model.layers.16.feed_forward.experts.29.w3, model.layers.16.feed_forward.experts.30.w1, | |
| model.layers.16.feed_forward.experts.30.w3, model.layers.16.feed_forward.experts.31.w1, | |
| model.layers.16.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.17.operator_norm | |
| balance_layers: [model.layers.17.conv.in_proj] | |
| - smooth_layer: model.layers.17.ffn_norm | |
| balance_layers: [model.layers.17.feed_forward.gate, model.layers.17.feed_forward.experts.0.w1, | |
| model.layers.17.feed_forward.experts.0.w3, model.layers.17.feed_forward.experts.1.w1, | |
| model.layers.17.feed_forward.experts.1.w3, model.layers.17.feed_forward.experts.2.w1, | |
| model.layers.17.feed_forward.experts.2.w3, model.layers.17.feed_forward.experts.3.w1, | |
| model.layers.17.feed_forward.experts.3.w3, model.layers.17.feed_forward.experts.4.w1, | |
| model.layers.17.feed_forward.experts.4.w3, model.layers.17.feed_forward.experts.5.w1, | |
| model.layers.17.feed_forward.experts.5.w3, model.layers.17.feed_forward.experts.6.w1, | |
| model.layers.17.feed_forward.experts.6.w3, model.layers.17.feed_forward.experts.7.w1, | |
| model.layers.17.feed_forward.experts.7.w3, model.layers.17.feed_forward.experts.8.w1, | |
| model.layers.17.feed_forward.experts.8.w3, model.layers.17.feed_forward.experts.9.w1, | |
| model.layers.17.feed_forward.experts.9.w3, model.layers.17.feed_forward.experts.10.w1, | |
| model.layers.17.feed_forward.experts.10.w3, model.layers.17.feed_forward.experts.11.w1, | |
| model.layers.17.feed_forward.experts.11.w3, model.layers.17.feed_forward.experts.12.w1, | |
| model.layers.17.feed_forward.experts.12.w3, model.layers.17.feed_forward.experts.13.w1, | |
| model.layers.17.feed_forward.experts.13.w3, model.layers.17.feed_forward.experts.14.w1, | |
| model.layers.17.feed_forward.experts.14.w3, model.layers.17.feed_forward.experts.15.w1, | |
| model.layers.17.feed_forward.experts.15.w3, model.layers.17.feed_forward.experts.16.w1, | |
| model.layers.17.feed_forward.experts.16.w3, model.layers.17.feed_forward.experts.17.w1, | |
| model.layers.17.feed_forward.experts.17.w3, model.layers.17.feed_forward.experts.18.w1, | |
| model.layers.17.feed_forward.experts.18.w3, model.layers.17.feed_forward.experts.19.w1, | |
| model.layers.17.feed_forward.experts.19.w3, model.layers.17.feed_forward.experts.20.w1, | |
| model.layers.17.feed_forward.experts.20.w3, model.layers.17.feed_forward.experts.21.w1, | |
| model.layers.17.feed_forward.experts.21.w3, model.layers.17.feed_forward.experts.22.w1, | |
| model.layers.17.feed_forward.experts.22.w3, model.layers.17.feed_forward.experts.23.w1, | |
| model.layers.17.feed_forward.experts.23.w3, model.layers.17.feed_forward.experts.24.w1, | |
| model.layers.17.feed_forward.experts.24.w3, model.layers.17.feed_forward.experts.25.w1, | |
| model.layers.17.feed_forward.experts.25.w3, model.layers.17.feed_forward.experts.26.w1, | |
| model.layers.17.feed_forward.experts.26.w3, model.layers.17.feed_forward.experts.27.w1, | |
| model.layers.17.feed_forward.experts.27.w3, model.layers.17.feed_forward.experts.28.w1, | |
| model.layers.17.feed_forward.experts.28.w3, model.layers.17.feed_forward.experts.29.w1, | |
| model.layers.17.feed_forward.experts.29.w3, model.layers.17.feed_forward.experts.30.w1, | |
| model.layers.17.feed_forward.experts.30.w3, model.layers.17.feed_forward.experts.31.w1, | |
| model.layers.17.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.18.operator_norm | |
| balance_layers: [model.layers.18.self_attn.q_proj, model.layers.18.self_attn.k_proj, | |
| model.layers.18.self_attn.v_proj] | |
| - smooth_layer: model.layers.18.ffn_norm | |
| balance_layers: [model.layers.18.feed_forward.gate, model.layers.18.feed_forward.experts.0.w1, | |
| model.layers.18.feed_forward.experts.0.w3, model.layers.18.feed_forward.experts.1.w1, | |
| model.layers.18.feed_forward.experts.1.w3, model.layers.18.feed_forward.experts.2.w1, | |
| model.layers.18.feed_forward.experts.2.w3, model.layers.18.feed_forward.experts.3.w1, | |
| model.layers.18.feed_forward.experts.3.w3, model.layers.18.feed_forward.experts.4.w1, | |
| model.layers.18.feed_forward.experts.4.w3, model.layers.18.feed_forward.experts.5.w1, | |
| model.layers.18.feed_forward.experts.5.w3, model.layers.18.feed_forward.experts.6.w1, | |
| model.layers.18.feed_forward.experts.6.w3, model.layers.18.feed_forward.experts.7.w1, | |
| model.layers.18.feed_forward.experts.7.w3, model.layers.18.feed_forward.experts.8.w1, | |
| model.layers.18.feed_forward.experts.8.w3, model.layers.18.feed_forward.experts.9.w1, | |
| model.layers.18.feed_forward.experts.9.w3, model.layers.18.feed_forward.experts.10.w1, | |
| model.layers.18.feed_forward.experts.10.w3, model.layers.18.feed_forward.experts.11.w1, | |
| model.layers.18.feed_forward.experts.11.w3, model.layers.18.feed_forward.experts.12.w1, | |
| model.layers.18.feed_forward.experts.12.w3, model.layers.18.feed_forward.experts.13.w1, | |
| model.layers.18.feed_forward.experts.13.w3, model.layers.18.feed_forward.experts.14.w1, | |
| model.layers.18.feed_forward.experts.14.w3, model.layers.18.feed_forward.experts.15.w1, | |
| model.layers.18.feed_forward.experts.15.w3, model.layers.18.feed_forward.experts.16.w1, | |
| model.layers.18.feed_forward.experts.16.w3, model.layers.18.feed_forward.experts.17.w1, | |
| model.layers.18.feed_forward.experts.17.w3, model.layers.18.feed_forward.experts.18.w1, | |
| model.layers.18.feed_forward.experts.18.w3, model.layers.18.feed_forward.experts.19.w1, | |
| model.layers.18.feed_forward.experts.19.w3, model.layers.18.feed_forward.experts.20.w1, | |
| model.layers.18.feed_forward.experts.20.w3, model.layers.18.feed_forward.experts.21.w1, | |
| model.layers.18.feed_forward.experts.21.w3, model.layers.18.feed_forward.experts.22.w1, | |
| model.layers.18.feed_forward.experts.22.w3, model.layers.18.feed_forward.experts.23.w1, | |
| model.layers.18.feed_forward.experts.23.w3, model.layers.18.feed_forward.experts.24.w1, | |
| model.layers.18.feed_forward.experts.24.w3, model.layers.18.feed_forward.experts.25.w1, | |
| model.layers.18.feed_forward.experts.25.w3, model.layers.18.feed_forward.experts.26.w1, | |
| model.layers.18.feed_forward.experts.26.w3, model.layers.18.feed_forward.experts.27.w1, | |
| model.layers.18.feed_forward.experts.27.w3, model.layers.18.feed_forward.experts.28.w1, | |
| model.layers.18.feed_forward.experts.28.w3, model.layers.18.feed_forward.experts.29.w1, | |
| model.layers.18.feed_forward.experts.29.w3, model.layers.18.feed_forward.experts.30.w1, | |
| model.layers.18.feed_forward.experts.30.w3, model.layers.18.feed_forward.experts.31.w1, | |
| model.layers.18.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.19.operator_norm | |
| balance_layers: [model.layers.19.conv.in_proj] | |
| - smooth_layer: model.layers.19.ffn_norm | |
| balance_layers: [model.layers.19.feed_forward.gate, model.layers.19.feed_forward.experts.0.w1, | |
| model.layers.19.feed_forward.experts.0.w3, model.layers.19.feed_forward.experts.1.w1, | |
| model.layers.19.feed_forward.experts.1.w3, model.layers.19.feed_forward.experts.2.w1, | |
| model.layers.19.feed_forward.experts.2.w3, model.layers.19.feed_forward.experts.3.w1, | |
| model.layers.19.feed_forward.experts.3.w3, model.layers.19.feed_forward.experts.4.w1, | |
| model.layers.19.feed_forward.experts.4.w3, model.layers.19.feed_forward.experts.5.w1, | |
| model.layers.19.feed_forward.experts.5.w3, model.layers.19.feed_forward.experts.6.w1, | |
| model.layers.19.feed_forward.experts.6.w3, model.layers.19.feed_forward.experts.7.w1, | |
| model.layers.19.feed_forward.experts.7.w3, model.layers.19.feed_forward.experts.8.w1, | |
| model.layers.19.feed_forward.experts.8.w3, model.layers.19.feed_forward.experts.9.w1, | |
| model.layers.19.feed_forward.experts.9.w3, model.layers.19.feed_forward.experts.10.w1, | |
| model.layers.19.feed_forward.experts.10.w3, model.layers.19.feed_forward.experts.11.w1, | |
| model.layers.19.feed_forward.experts.11.w3, model.layers.19.feed_forward.experts.12.w1, | |
| model.layers.19.feed_forward.experts.12.w3, model.layers.19.feed_forward.experts.13.w1, | |
| model.layers.19.feed_forward.experts.13.w3, model.layers.19.feed_forward.experts.14.w1, | |
| model.layers.19.feed_forward.experts.14.w3, model.layers.19.feed_forward.experts.15.w1, | |
| model.layers.19.feed_forward.experts.15.w3, model.layers.19.feed_forward.experts.16.w1, | |
| model.layers.19.feed_forward.experts.16.w3, model.layers.19.feed_forward.experts.17.w1, | |
| model.layers.19.feed_forward.experts.17.w3, model.layers.19.feed_forward.experts.18.w1, | |
| model.layers.19.feed_forward.experts.18.w3, model.layers.19.feed_forward.experts.19.w1, | |
| model.layers.19.feed_forward.experts.19.w3, model.layers.19.feed_forward.experts.20.w1, | |
| model.layers.19.feed_forward.experts.20.w3, model.layers.19.feed_forward.experts.21.w1, | |
| model.layers.19.feed_forward.experts.21.w3, model.layers.19.feed_forward.experts.22.w1, | |
| model.layers.19.feed_forward.experts.22.w3, model.layers.19.feed_forward.experts.23.w1, | |
| model.layers.19.feed_forward.experts.23.w3, model.layers.19.feed_forward.experts.24.w1, | |
| model.layers.19.feed_forward.experts.24.w3, model.layers.19.feed_forward.experts.25.w1, | |
| model.layers.19.feed_forward.experts.25.w3, model.layers.19.feed_forward.experts.26.w1, | |
| model.layers.19.feed_forward.experts.26.w3, model.layers.19.feed_forward.experts.27.w1, | |
| model.layers.19.feed_forward.experts.27.w3, model.layers.19.feed_forward.experts.28.w1, | |
| model.layers.19.feed_forward.experts.28.w3, model.layers.19.feed_forward.experts.29.w1, | |
| model.layers.19.feed_forward.experts.29.w3, model.layers.19.feed_forward.experts.30.w1, | |
| model.layers.19.feed_forward.experts.30.w3, model.layers.19.feed_forward.experts.31.w1, | |
| model.layers.19.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.20.operator_norm | |
| balance_layers: [model.layers.20.conv.in_proj] | |
| - smooth_layer: model.layers.20.ffn_norm | |
| balance_layers: [model.layers.20.feed_forward.gate, model.layers.20.feed_forward.experts.0.w1, | |
| model.layers.20.feed_forward.experts.0.w3, model.layers.20.feed_forward.experts.1.w1, | |
| model.layers.20.feed_forward.experts.1.w3, model.layers.20.feed_forward.experts.2.w1, | |
| model.layers.20.feed_forward.experts.2.w3, model.layers.20.feed_forward.experts.3.w1, | |
| model.layers.20.feed_forward.experts.3.w3, model.layers.20.feed_forward.experts.4.w1, | |
| model.layers.20.feed_forward.experts.4.w3, model.layers.20.feed_forward.experts.5.w1, | |
| model.layers.20.feed_forward.experts.5.w3, model.layers.20.feed_forward.experts.6.w1, | |
| model.layers.20.feed_forward.experts.6.w3, model.layers.20.feed_forward.experts.7.w1, | |
| model.layers.20.feed_forward.experts.7.w3, model.layers.20.feed_forward.experts.8.w1, | |
| model.layers.20.feed_forward.experts.8.w3, model.layers.20.feed_forward.experts.9.w1, | |
| model.layers.20.feed_forward.experts.9.w3, model.layers.20.feed_forward.experts.10.w1, | |
| model.layers.20.feed_forward.experts.10.w3, model.layers.20.feed_forward.experts.11.w1, | |
| model.layers.20.feed_forward.experts.11.w3, model.layers.20.feed_forward.experts.12.w1, | |
| model.layers.20.feed_forward.experts.12.w3, model.layers.20.feed_forward.experts.13.w1, | |
| model.layers.20.feed_forward.experts.13.w3, model.layers.20.feed_forward.experts.14.w1, | |
| model.layers.20.feed_forward.experts.14.w3, model.layers.20.feed_forward.experts.15.w1, | |
| model.layers.20.feed_forward.experts.15.w3, model.layers.20.feed_forward.experts.16.w1, | |
| model.layers.20.feed_forward.experts.16.w3, model.layers.20.feed_forward.experts.17.w1, | |
| model.layers.20.feed_forward.experts.17.w3, model.layers.20.feed_forward.experts.18.w1, | |
| model.layers.20.feed_forward.experts.18.w3, model.layers.20.feed_forward.experts.19.w1, | |
| model.layers.20.feed_forward.experts.19.w3, model.layers.20.feed_forward.experts.20.w1, | |
| model.layers.20.feed_forward.experts.20.w3, model.layers.20.feed_forward.experts.21.w1, | |
| model.layers.20.feed_forward.experts.21.w3, model.layers.20.feed_forward.experts.22.w1, | |
| model.layers.20.feed_forward.experts.22.w3, model.layers.20.feed_forward.experts.23.w1, | |
| model.layers.20.feed_forward.experts.23.w3, model.layers.20.feed_forward.experts.24.w1, | |
| model.layers.20.feed_forward.experts.24.w3, model.layers.20.feed_forward.experts.25.w1, | |
| model.layers.20.feed_forward.experts.25.w3, model.layers.20.feed_forward.experts.26.w1, | |
| model.layers.20.feed_forward.experts.26.w3, model.layers.20.feed_forward.experts.27.w1, | |
| model.layers.20.feed_forward.experts.27.w3, model.layers.20.feed_forward.experts.28.w1, | |
| model.layers.20.feed_forward.experts.28.w3, model.layers.20.feed_forward.experts.29.w1, | |
| model.layers.20.feed_forward.experts.29.w3, model.layers.20.feed_forward.experts.30.w1, | |
| model.layers.20.feed_forward.experts.30.w3, model.layers.20.feed_forward.experts.31.w1, | |
| model.layers.20.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.21.operator_norm | |
| balance_layers: [model.layers.21.self_attn.q_proj, model.layers.21.self_attn.k_proj, | |
| model.layers.21.self_attn.v_proj] | |
| - smooth_layer: model.layers.21.ffn_norm | |
| balance_layers: [model.layers.21.feed_forward.gate, model.layers.21.feed_forward.experts.0.w1, | |
| model.layers.21.feed_forward.experts.0.w3, model.layers.21.feed_forward.experts.1.w1, | |
| model.layers.21.feed_forward.experts.1.w3, model.layers.21.feed_forward.experts.2.w1, | |
| model.layers.21.feed_forward.experts.2.w3, model.layers.21.feed_forward.experts.3.w1, | |
| model.layers.21.feed_forward.experts.3.w3, model.layers.21.feed_forward.experts.4.w1, | |
| model.layers.21.feed_forward.experts.4.w3, model.layers.21.feed_forward.experts.5.w1, | |
| model.layers.21.feed_forward.experts.5.w3, model.layers.21.feed_forward.experts.6.w1, | |
| model.layers.21.feed_forward.experts.6.w3, model.layers.21.feed_forward.experts.7.w1, | |
| model.layers.21.feed_forward.experts.7.w3, model.layers.21.feed_forward.experts.8.w1, | |
| model.layers.21.feed_forward.experts.8.w3, model.layers.21.feed_forward.experts.9.w1, | |
| model.layers.21.feed_forward.experts.9.w3, model.layers.21.feed_forward.experts.10.w1, | |
| model.layers.21.feed_forward.experts.10.w3, model.layers.21.feed_forward.experts.11.w1, | |
| model.layers.21.feed_forward.experts.11.w3, model.layers.21.feed_forward.experts.12.w1, | |
| model.layers.21.feed_forward.experts.12.w3, model.layers.21.feed_forward.experts.13.w1, | |
| model.layers.21.feed_forward.experts.13.w3, model.layers.21.feed_forward.experts.14.w1, | |
| model.layers.21.feed_forward.experts.14.w3, model.layers.21.feed_forward.experts.15.w1, | |
| model.layers.21.feed_forward.experts.15.w3, model.layers.21.feed_forward.experts.16.w1, | |
| model.layers.21.feed_forward.experts.16.w3, model.layers.21.feed_forward.experts.17.w1, | |
| model.layers.21.feed_forward.experts.17.w3, model.layers.21.feed_forward.experts.18.w1, | |
| model.layers.21.feed_forward.experts.18.w3, model.layers.21.feed_forward.experts.19.w1, | |
| model.layers.21.feed_forward.experts.19.w3, model.layers.21.feed_forward.experts.20.w1, | |
| model.layers.21.feed_forward.experts.20.w3, model.layers.21.feed_forward.experts.21.w1, | |
| model.layers.21.feed_forward.experts.21.w3, model.layers.21.feed_forward.experts.22.w1, | |
| model.layers.21.feed_forward.experts.22.w3, model.layers.21.feed_forward.experts.23.w1, | |
| model.layers.21.feed_forward.experts.23.w3, model.layers.21.feed_forward.experts.24.w1, | |
| model.layers.21.feed_forward.experts.24.w3, model.layers.21.feed_forward.experts.25.w1, | |
| model.layers.21.feed_forward.experts.25.w3, model.layers.21.feed_forward.experts.26.w1, | |
| model.layers.21.feed_forward.experts.26.w3, model.layers.21.feed_forward.experts.27.w1, | |
| model.layers.21.feed_forward.experts.27.w3, model.layers.21.feed_forward.experts.28.w1, | |
| model.layers.21.feed_forward.experts.28.w3, model.layers.21.feed_forward.experts.29.w1, | |
| model.layers.21.feed_forward.experts.29.w3, model.layers.21.feed_forward.experts.30.w1, | |
| model.layers.21.feed_forward.experts.30.w3, model.layers.21.feed_forward.experts.31.w1, | |
| model.layers.21.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.22.operator_norm | |
| balance_layers: [model.layers.22.conv.in_proj] | |
| - smooth_layer: model.layers.22.ffn_norm | |
| balance_layers: [model.layers.22.feed_forward.gate, model.layers.22.feed_forward.experts.0.w1, | |
| model.layers.22.feed_forward.experts.0.w3, model.layers.22.feed_forward.experts.1.w1, | |
| model.layers.22.feed_forward.experts.1.w3, model.layers.22.feed_forward.experts.2.w1, | |
| model.layers.22.feed_forward.experts.2.w3, model.layers.22.feed_forward.experts.3.w1, | |
| model.layers.22.feed_forward.experts.3.w3, model.layers.22.feed_forward.experts.4.w1, | |
| model.layers.22.feed_forward.experts.4.w3, model.layers.22.feed_forward.experts.5.w1, | |
| model.layers.22.feed_forward.experts.5.w3, model.layers.22.feed_forward.experts.6.w1, | |
| model.layers.22.feed_forward.experts.6.w3, model.layers.22.feed_forward.experts.7.w1, | |
| model.layers.22.feed_forward.experts.7.w3, model.layers.22.feed_forward.experts.8.w1, | |
| model.layers.22.feed_forward.experts.8.w3, model.layers.22.feed_forward.experts.9.w1, | |
| model.layers.22.feed_forward.experts.9.w3, model.layers.22.feed_forward.experts.10.w1, | |
| model.layers.22.feed_forward.experts.10.w3, model.layers.22.feed_forward.experts.11.w1, | |
| model.layers.22.feed_forward.experts.11.w3, model.layers.22.feed_forward.experts.12.w1, | |
| model.layers.22.feed_forward.experts.12.w3, model.layers.22.feed_forward.experts.13.w1, | |
| model.layers.22.feed_forward.experts.13.w3, model.layers.22.feed_forward.experts.14.w1, | |
| model.layers.22.feed_forward.experts.14.w3, model.layers.22.feed_forward.experts.15.w1, | |
| model.layers.22.feed_forward.experts.15.w3, model.layers.22.feed_forward.experts.16.w1, | |
| model.layers.22.feed_forward.experts.16.w3, model.layers.22.feed_forward.experts.17.w1, | |
| model.layers.22.feed_forward.experts.17.w3, model.layers.22.feed_forward.experts.18.w1, | |
| model.layers.22.feed_forward.experts.18.w3, model.layers.22.feed_forward.experts.19.w1, | |
| model.layers.22.feed_forward.experts.19.w3, model.layers.22.feed_forward.experts.20.w1, | |
| model.layers.22.feed_forward.experts.20.w3, model.layers.22.feed_forward.experts.21.w1, | |
| model.layers.22.feed_forward.experts.21.w3, model.layers.22.feed_forward.experts.22.w1, | |
| model.layers.22.feed_forward.experts.22.w3, model.layers.22.feed_forward.experts.23.w1, | |
| model.layers.22.feed_forward.experts.23.w3, model.layers.22.feed_forward.experts.24.w1, | |
| model.layers.22.feed_forward.experts.24.w3, model.layers.22.feed_forward.experts.25.w1, | |
| model.layers.22.feed_forward.experts.25.w3, model.layers.22.feed_forward.experts.26.w1, | |
| model.layers.22.feed_forward.experts.26.w3, model.layers.22.feed_forward.experts.27.w1, | |
| model.layers.22.feed_forward.experts.27.w3, model.layers.22.feed_forward.experts.28.w1, | |
| model.layers.22.feed_forward.experts.28.w3, model.layers.22.feed_forward.experts.29.w1, | |
| model.layers.22.feed_forward.experts.29.w3, model.layers.22.feed_forward.experts.30.w1, | |
| model.layers.22.feed_forward.experts.30.w3, model.layers.22.feed_forward.experts.31.w1, | |
| model.layers.22.feed_forward.experts.31.w3] | |
| - smooth_layer: model.layers.23.operator_norm | |
| balance_layers: [model.layers.23.conv.in_proj] | |
| - smooth_layer: model.layers.23.ffn_norm | |
| balance_layers: [model.layers.23.feed_forward.gate, model.layers.23.feed_forward.experts.0.w1, | |
| model.layers.23.feed_forward.experts.0.w3, model.layers.23.feed_forward.experts.1.w1, | |
| model.layers.23.feed_forward.experts.1.w3, model.layers.23.feed_forward.experts.2.w1, | |
| model.layers.23.feed_forward.experts.2.w3, model.layers.23.feed_forward.experts.3.w1, | |
| model.layers.23.feed_forward.experts.3.w3, model.layers.23.feed_forward.experts.4.w1, | |
| model.layers.23.feed_forward.experts.4.w3, model.layers.23.feed_forward.experts.5.w1, | |
| model.layers.23.feed_forward.experts.5.w3, model.layers.23.feed_forward.experts.6.w1, | |
| model.layers.23.feed_forward.experts.6.w3, model.layers.23.feed_forward.experts.7.w1, | |
| model.layers.23.feed_forward.experts.7.w3, model.layers.23.feed_forward.experts.8.w1, | |
| model.layers.23.feed_forward.experts.8.w3, model.layers.23.feed_forward.experts.9.w1, | |
| model.layers.23.feed_forward.experts.9.w3, model.layers.23.feed_forward.experts.10.w1, | |
| model.layers.23.feed_forward.experts.10.w3, model.layers.23.feed_forward.experts.11.w1, | |
| model.layers.23.feed_forward.experts.11.w3, model.layers.23.feed_forward.experts.12.w1, | |
| model.layers.23.feed_forward.experts.12.w3, model.layers.23.feed_forward.experts.13.w1, | |
| model.layers.23.feed_forward.experts.13.w3, model.layers.23.feed_forward.experts.14.w1, | |
| model.layers.23.feed_forward.experts.14.w3, model.layers.23.feed_forward.experts.15.w1, | |
| model.layers.23.feed_forward.experts.15.w3, model.layers.23.feed_forward.experts.16.w1, | |
| model.layers.23.feed_forward.experts.16.w3, model.layers.23.feed_forward.experts.17.w1, | |
| model.layers.23.feed_forward.experts.17.w3, model.layers.23.feed_forward.experts.18.w1, | |
| model.layers.23.feed_forward.experts.18.w3, model.layers.23.feed_forward.experts.19.w1, | |
| model.layers.23.feed_forward.experts.19.w3, model.layers.23.feed_forward.experts.20.w1, | |
| model.layers.23.feed_forward.experts.20.w3, model.layers.23.feed_forward.experts.21.w1, | |
| model.layers.23.feed_forward.experts.21.w3, model.layers.23.feed_forward.experts.22.w1, | |
| model.layers.23.feed_forward.experts.22.w3, model.layers.23.feed_forward.experts.23.w1, | |
| model.layers.23.feed_forward.experts.23.w3, model.layers.23.feed_forward.experts.24.w1, | |
| model.layers.23.feed_forward.experts.24.w3, model.layers.23.feed_forward.experts.25.w1, | |
| model.layers.23.feed_forward.experts.25.w3, model.layers.23.feed_forward.experts.26.w1, | |
| model.layers.23.feed_forward.experts.26.w3, model.layers.23.feed_forward.experts.27.w1, | |
| model.layers.23.feed_forward.experts.27.w3, model.layers.23.feed_forward.experts.28.w1, | |
| model.layers.23.feed_forward.experts.28.w3, model.layers.23.feed_forward.experts.29.w1, | |
| model.layers.23.feed_forward.experts.29.w3, model.layers.23.feed_forward.experts.30.w1, | |
| model.layers.23.feed_forward.experts.30.w3, model.layers.23.feed_forward.experts.31.w1, | |
| model.layers.23.feed_forward.experts.31.w3] | |
| - smooth_layer: model.embedding_norm | |
| balance_layers: [lm_head] | |
| duo_scaling: true | |