| models: | |
| - model: tokyotech-llm/Swallow-7b-instruct-hf | |
| # no parameters necessary for base model | |
| - model: codellama/CodeLlama-7b-Python-hf # follow user intent | |
| parameters: | |
| density: 1 | |
| weight: | |
| - filter: mlp.down_proj | |
| value: [0.3, 0.25, 0.25, 0.15, 0.1] | |
| - filter: mlp.gate_proj | |
| value: [0.7, 0.25, 0.5, 0.45, 0.4] | |
| - filter: mlp.up_proj | |
| value: [0.7, 0.25, 0.5, 0.45, 0.4] | |
| - filter: self_attn | |
| value: [0.7, 0.25, 0.5, 0.45, 0.4] | |
| - value: 0 # fallback for rest of tensors. | |
| merge_method: dare_ties | |
| base_model: tokyotech-llm/Swallow-7b-instruct-hf | |
| dtype: bfloat16 | |
| tokenizer_source: union | |