corupta's picture
Upload folder using huggingface_hub
236a469 verified
{
"metadata": {
"ParamSize": 579,
"ParamBytes": 6098479104.0,
"BitsPerParam": 22.439073187342018
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1244659712,
"records": [
{
"name": "lm_head.weight",
"shape": [
151936,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1244659712,
"byteOffset": 0
}
],
"md5sum": "d64dee60305ffd33a4da152f96235464"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 1244659712,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
151936,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1244659712,
"byteOffset": 0
}
],
"md5sum": "e70db17c1e02ac3ecb17b0ba6cd71a97"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "b8625aa7a95d91ad1e22264216bbba54"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 28131584,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 8192
},
{
"name": "model.layers.0.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 25174016
},
{
"name": "model.layers.0.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 25370624
},
{
"name": "model.layers.0.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 26157056
},
{
"name": "model.layers.0.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 26550272
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 28123136
},
{
"name": "model.layers.0.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 28131328
}
],
"md5sum": "86226d1d10a87e2b04fcb3d11c201bdb"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "46fac4d1497dc53504ca1412d33b7705"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "56991dfba9ecfb774f65e6e6a1c41429"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.0.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.0.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.0.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.0.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.0.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.1.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.1.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.1.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.1.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.1.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "79e01863661867de9bb52261ee3f8004"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0dde4c3016701682201715baf9d4a23c"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "255046eb8e3034635168f5c2d8d61a24"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.1.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.1.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.1.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.1.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.1.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.10.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.10.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.10.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.10.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.10.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "4f9dca364828ba865d30209755f0bf93"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8928ecc2e1ceb44535c2769007894f0f"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "2f9aa35fb4b13c5314cd1d9ef4d49f08"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.10.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.10.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.10.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.10.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.10.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.11.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.11.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.11.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.11.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.11.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "1b5764c92d3b062e9fc43d10a2f8a15e"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7bfd424d8fdf5c8bdde8f9e41b10ffa6"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e078c787661e43bf92728af8c15a7ceb"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.11.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.11.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.11.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.11.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.12.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.12.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.12.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.12.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.12.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "81bfa2e5939b4c0eb7fd2e31562c0d79"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b1bf488012f3136734d673f70de65658"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e45a21e66f356c459aec8bf0a3093cb5"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.12.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.12.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.12.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.12.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.13.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.13.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.13.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.13.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.13.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "a1477efa32f1ea536ba281f1789cbfb6"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5e094003e969517105d51c9175cc4e45"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f04417e820e6585eac3056995fe261c3"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.13.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.13.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.13.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.13.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.13.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.14.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.14.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.14.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.14.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.14.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "75320aa079693ca30f2274eb5f076fdb"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0f2870ba0aaa4e5fca73ceb38bba811f"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "d174aecb8f85dde3d2f564169b168aca"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.14.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.14.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.14.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.14.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.14.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.15.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.15.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.15.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.15.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.15.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "61c1ffd968556194349c2cb9056e009c"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "58fada0aca33ca74ca97c0cfe37de9bd"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "6497ec0b075d94413a905dab4e1132ac"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.15.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.15.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.15.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.15.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.16.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.16.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.16.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.16.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.16.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "c58297f04113a22753688e3efee8d4f7"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "454c05d6923f51493518df944f2417a2"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "2000b2249ecc5ac7023a2203c481815c"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.16.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.16.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.16.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.16.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.16.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.17.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.17.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.17.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.17.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.17.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "393d407bf389b3e2031ee78e9e612354"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8ddf88764771e69ae4e90b67029af1cc"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4cf021f6ac9ad4311d879fc35234a374"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.17.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.17.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.17.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.17.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.17.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.18.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.18.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.18.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.18.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.18.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "6959a3c4fb3461bfd113ed7908e57ff2"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e80a454db066954b8844d9b148b7804d"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f8dad8a18db560d0074432a82c4adf99"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.18.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.18.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.18.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.18.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.18.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.19.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.19.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.19.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.19.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.19.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "eb01bf15d6c4240bce4381ee76f78c30"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fac9ebe56c5ba57a194105a9c8152e22"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "790b8be9aa561c62b5cd0448076937c7"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.19.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.19.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.19.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.19.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.2.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.2.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.2.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.2.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.2.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "1115eb0171c45f745d7d730c491bcb59"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e457c23395095d288baef9e900f0ba85"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "58e4c40c610429549fb223d380bfd8dc"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.2.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.2.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.2.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.2.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.2.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.20.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.20.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.20.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.20.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.20.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "185c35af76212c303cfd1a2dff6a60ce"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2f376fbb9ba66316fe6868bcecdbdb4d"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7982450591a32cf4e4e00218a41ffe71"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.20.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.20.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.20.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.20.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.20.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.21.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.21.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.21.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.21.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.21.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "c578ae4e47488c81660b46dc9004fc7c"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f0626adeede79b2e37aaa148024a0cc2"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7354925cae6a830c7783b08d3ca3167b"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.21.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.21.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.21.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.21.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.21.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.22.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.22.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.22.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.22.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.22.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "0528a25cce9f716a06a5929cc09712c4"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7c12819c21b9d0e7533aaa52c37996b6"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "ae3c695bac72e518b5cd7fc91127a888"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.22.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.22.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.22.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.22.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.22.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.23.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.23.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.23.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.23.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.23.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "e31358a0a1b6b859e1111258b18cf0f3"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.24.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "07d7f1b5c07eaf87eb1aeb204de1a6b3"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a3c6dc276ace590d1f7fe4128d9092a8"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.23.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.23.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.23.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.23.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.24.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.24.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.24.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.24.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.24.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "13e32bf33f99989aa7ca3aac4b7d2258"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0c24d4d443cc45daeb63ecc421fabac4"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "c5dd9674727124364d8a23896d8467e7"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.24.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.24.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.24.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.24.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.24.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.25.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.25.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.25.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.25.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.25.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "c00d87d47ff50af6fa8a4e5759ec0e11"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d1066e325fc81d50de410fed74debc5c"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "778f17b0f46f77237e2941761928c35d"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.25.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.25.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.25.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.25.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.25.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.26.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.26.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.26.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.26.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.26.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "15f3d9f8c577ec3f8c8ee8565760c26a"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b66c030936a8801b6e2bbd46b88fbdf5"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "fd8cde921ac657192b4096b7fdbfbfaf"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.26.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.26.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.26.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.26.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.26.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.27.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.27.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.27.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.27.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.27.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "330f4579c7d941d57fcd35911a3a2509"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.28.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3b37be200d67623085d0e0daa4874ec3"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "88b39b5cd7e14ce3240d805c00c9e41d"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.27.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.27.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.27.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.27.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.28.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.28.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.28.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.28.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.28.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "26595893140c6211d670b9305d8e5015"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.29.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e247a94c104e47a5738281ae525bca65"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "6eef6153a06c6836ea052080f014baa5"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.28.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.28.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.28.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.28.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.28.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.28.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.29.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.29.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.29.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.29.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.29.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "3d9e3405c60ec93b8584c09486a69281"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d1065c490d4325e4966ebd46754d602a"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "d6f32f787af5af9b21f9d269ca4804fc"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.29.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.29.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.29.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.29.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.29.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.29.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.3.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.3.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.3.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.3.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.3.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "50ae4794801943c4adabd004505796b1"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.30.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f88991db23821475a99a7fcf027337a6"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "6c3d51fd27bcc660e4c5da4f5581d2ac"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.3.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.3.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.3.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.3.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.30.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.30.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.30.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.30.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.30.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "f58c94a387c290ae890b92f5efdfd0ef"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.31.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d6a03927f3e927299b50578858bd2ebd"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "588937f286b487349d08d734d9cd7d59"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.30.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.30.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.30.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.30.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.30.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.30.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.31.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.31.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.31.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.31.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.31.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "e37b1aae46c1016c3738bce469960ca7"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.32.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b66cdfee55cb09eb0a9140aa3f846a97"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "39049cb0da30d5ff50faf02f5101c9d5"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.31.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.31.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.31.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.31.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.31.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.31.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.32.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.32.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.32.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.32.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.32.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "421d332d4f32a3e4be2da5c12ae16054"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.33.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "26f124ebd05ac6e362de9694bde1e7db"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "c282630b2b146515b26683e8a7539555"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.32.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.32.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.32.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.32.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.32.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.32.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.33.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.33.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.33.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.33.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.33.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "c3222bd9f42b352ccb57970ef461ebdf"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.34.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "44ac3c929c8dfead50f6df28ac320294"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "ef2a720f30857ceeef512fa533670802"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.33.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.33.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.33.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.33.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.33.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.33.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.34.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.34.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.34.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.34.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.34.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "5217dcaaaec6cc1a119110f4ec7aea7a"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.35.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "12cb3e6c1c38cfc0dadbb051fd3d6344"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "232bc44233d1a8fafbef71bc99bdb767"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.34.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.34.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.34.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.34.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.34.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.34.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.35.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.35.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.35.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.35.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.35.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "9203e0e82d2f17b4d456c73e0c6c5c4c"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "eef4583b38397da6a30551e9ec66b8b2"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "d40da3f4c34f15c87d154676e02a1d3c"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.35.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.35.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.35.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.35.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.35.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.35.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.4.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.4.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.4.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.4.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.4.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "c84fb73ee4c9a3b52d6527c898e22618"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d98e2cfa9cecd87e24e73fb76d5a3deb"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "de7a514e535c33b48378420db47c349c"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.4.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.4.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.4.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.4.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.4.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.5.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.5.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.5.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.5.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.5.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "54b9a0bb8803eac67827925ca968f7a8"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b9ea49c37cff1afa452e4398722bdc72"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7fcd5b011e60281af583b0bfa34961d3"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.5.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.5.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.5.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.5.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.5.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.6.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.6.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.6.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.6.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.6.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "571dcbfdf70e7a6e5ce238b010d10d71"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "aaf376f0cf5b6c9b4fbb8363679155d9"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "351102637ebed9b4a484db62aa8bea17"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.6.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.6.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.6.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.6.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.6.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.7.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.7.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.7.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.7.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.7.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "8d5096a1e6412f27f9e2fb3862564a11"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "06b2ebfcbe31df25c6f243d071b6ca9d"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "186b5edb1c234c01120de109610b4b72"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.7.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.7.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.7.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.7.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.8.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.8.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.8.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.8.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.8.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "e0dd964ebf995377639855d43392f8ef"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.mlp.down_proj.qweight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c8a32a2ddd4394d7a8049433f588c139"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.qweight",
"shape": [
4096,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "0cb8fc062a80f9444bec32464bb9c193"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 24756736,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.8.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.8.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.8.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.8.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.8.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
},
{
"name": "model.layers.9.mlp.down_proj.qzeros",
"shape": [
96,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21799168
},
{
"name": "model.layers.9.mlp.down_proj.scales",
"shape": [
96,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 21995776
},
{
"name": "model.layers.9.mlp.gate_up_proj.qzeros",
"shape": [
32,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22782208
},
{
"name": "model.layers.9.mlp.gate_up_proj.scales",
"shape": [
32,
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23175424
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24748288
},
{
"name": "model.layers.9.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24756480
}
],
"md5sum": "9acd66406bad5be1d44e4df09de3b7bf"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 21799168,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.qweight",
"shape": [
4096,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.c_attn.qzeros",
"shape": [
32,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 98304,
"byteOffset": 12582912
},
{
"name": "model.layers.9.self_attn.c_attn.scales",
"shape": [
32,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12681216
},
{
"name": "model.layers.9.self_attn.o_proj.qweight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 13074432
},
{
"name": "model.layers.9.self_attn.o_proj.qzeros",
"shape": [
32,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 21463040
},
{
"name": "model.layers.9.self_attn.o_proj.scales",
"shape": [
32,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21528576
},
{
"name": "model.layers.9.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21790720
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21790976
}
],
"md5sum": "79b240b6291e8230d0a54ae9e491c3ec"
}
]
}