| { | |
| "metadata": { | |
| "ParamSize": 579, | |
| "ParamBytes": 6098479104.0, | |
| "BitsPerParam": 22.439073187342018 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 1244659712, | |
| "records": [ | |
| { | |
| "name": "lm_head.weight", | |
| "shape": [ | |
| 151936, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1244659712, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d64dee60305ffd33a4da152f96235464" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 1244659712, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.weight", | |
| "shape": [ | |
| 151936, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1244659712, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e70db17c1e02ac3ecb17b0ba6cd71a97" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b8625aa7a95d91ad1e22264216bbba54" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 28131584, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 8192 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 25174016 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 25370624 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 26157056 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 26550272 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 28123136 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 28131328 | |
| } | |
| ], | |
| "md5sum": "86226d1d10a87e2b04fcb3d11c201bdb" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "46fac4d1497dc53504ca1412d33b7705" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "56991dfba9ecfb774f65e6e6a1c41429" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "79e01863661867de9bb52261ee3f8004" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0dde4c3016701682201715baf9d4a23c" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "255046eb8e3034635168f5c2d8d61a24" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "4f9dca364828ba865d30209755f0bf93" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8928ecc2e1ceb44535c2769007894f0f" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2f9aa35fb4b13c5314cd1d9ef4d49f08" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "1b5764c92d3b062e9fc43d10a2f8a15e" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7bfd424d8fdf5c8bdde8f9e41b10ffa6" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e078c787661e43bf92728af8c15a7ceb" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "81bfa2e5939b4c0eb7fd2e31562c0d79" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b1bf488012f3136734d673f70de65658" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e45a21e66f356c459aec8bf0a3093cb5" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "a1477efa32f1ea536ba281f1789cbfb6" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5e094003e969517105d51c9175cc4e45" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f04417e820e6585eac3056995fe261c3" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "75320aa079693ca30f2274eb5f076fdb" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0f2870ba0aaa4e5fca73ceb38bba811f" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d174aecb8f85dde3d2f564169b168aca" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "61c1ffd968556194349c2cb9056e009c" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "58fada0aca33ca74ca97c0cfe37de9bd" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6497ec0b075d94413a905dab4e1132ac" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "c58297f04113a22753688e3efee8d4f7" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "454c05d6923f51493518df944f2417a2" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2000b2249ecc5ac7023a2203c481815c" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "393d407bf389b3e2031ee78e9e612354" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8ddf88764771e69ae4e90b67029af1cc" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4cf021f6ac9ad4311d879fc35234a374" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "6959a3c4fb3461bfd113ed7908e57ff2" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e80a454db066954b8844d9b148b7804d" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f8dad8a18db560d0074432a82c4adf99" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "eb01bf15d6c4240bce4381ee76f78c30" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fac9ebe56c5ba57a194105a9c8152e22" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "790b8be9aa561c62b5cd0448076937c7" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "1115eb0171c45f745d7d730c491bcb59" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e457c23395095d288baef9e900f0ba85" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "58e4c40c610429549fb223d380bfd8dc" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "185c35af76212c303cfd1a2dff6a60ce" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2f376fbb9ba66316fe6868bcecdbdb4d" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7982450591a32cf4e4e00218a41ffe71" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "c578ae4e47488c81660b46dc9004fc7c" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f0626adeede79b2e37aaa148024a0cc2" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7354925cae6a830c7783b08d3ca3167b" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "0528a25cce9f716a06a5929cc09712c4" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7c12819c21b9d0e7533aaa52c37996b6" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ae3c695bac72e518b5cd7fc91127a888" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "e31358a0a1b6b859e1111258b18cf0f3" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "07d7f1b5c07eaf87eb1aeb204de1a6b3" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a3c6dc276ace590d1f7fe4128d9092a8" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "13e32bf33f99989aa7ca3aac4b7d2258" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0c24d4d443cc45daeb63ecc421fabac4" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c5dd9674727124364d8a23896d8467e7" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "c00d87d47ff50af6fa8a4e5759ec0e11" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d1066e325fc81d50de410fed74debc5c" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "778f17b0f46f77237e2941761928c35d" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "15f3d9f8c577ec3f8c8ee8565760c26a" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b66c030936a8801b6e2bbd46b88fbdf5" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fd8cde921ac657192b4096b7fdbfbfaf" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "330f4579c7d941d57fcd35911a3a2509" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3b37be200d67623085d0e0daa4874ec3" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "88b39b5cd7e14ce3240d805c00c9e41d" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.28.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "26595893140c6211d670b9305d8e5015" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e247a94c104e47a5738281ae525bca65" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6eef6153a06c6836ea052080f014baa5" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.29.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "3d9e3405c60ec93b8584c09486a69281" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d1065c490d4325e4966ebd46754d602a" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d6f32f787af5af9b21f9d269ca4804fc" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "50ae4794801943c4adabd004505796b1" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f88991db23821475a99a7fcf027337a6" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6c3d51fd27bcc660e4c5da4f5581d2ac" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.30.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.30.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.30.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.30.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "f58c94a387c290ae890b92f5efdfd0ef" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d6a03927f3e927299b50578858bd2ebd" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "588937f286b487349d08d734d9cd7d59" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.31.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.31.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.31.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.31.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "e37b1aae46c1016c3738bce469960ca7" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b66cdfee55cb09eb0a9140aa3f846a97" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "39049cb0da30d5ff50faf02f5101c9d5" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.32.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.32.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.32.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.32.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.32.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.32.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.32.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "421d332d4f32a3e4be2da5c12ae16054" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "26f124ebd05ac6e362de9694bde1e7db" | |
| }, | |
| { | |
| "dataPath": "params_shard_83.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c282630b2b146515b26683e8a7539555" | |
| }, | |
| { | |
| "dataPath": "params_shard_84.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.32.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.32.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.32.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.32.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.32.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.32.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.33.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.33.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.33.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.33.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.33.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.33.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.33.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "c3222bd9f42b352ccb57970ef461ebdf" | |
| }, | |
| { | |
| "dataPath": "params_shard_85.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "44ac3c929c8dfead50f6df28ac320294" | |
| }, | |
| { | |
| "dataPath": "params_shard_86.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ef2a720f30857ceeef512fa533670802" | |
| }, | |
| { | |
| "dataPath": "params_shard_87.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.33.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.33.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.33.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.33.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.33.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.33.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.34.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.34.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.34.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.34.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.34.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.34.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.34.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "5217dcaaaec6cc1a119110f4ec7aea7a" | |
| }, | |
| { | |
| "dataPath": "params_shard_88.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "12cb3e6c1c38cfc0dadbb051fd3d6344" | |
| }, | |
| { | |
| "dataPath": "params_shard_89.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "232bc44233d1a8fafbef71bc99bdb767" | |
| }, | |
| { | |
| "dataPath": "params_shard_90.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.34.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.34.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.34.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.34.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.34.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.34.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.35.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.35.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.35.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.35.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.35.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.35.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.35.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "9203e0e82d2f17b4d456c73e0c6c5c4c" | |
| }, | |
| { | |
| "dataPath": "params_shard_91.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eef4583b38397da6a30551e9ec66b8b2" | |
| }, | |
| { | |
| "dataPath": "params_shard_92.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d40da3f4c34f15c87d154676e02a1d3c" | |
| }, | |
| { | |
| "dataPath": "params_shard_93.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.35.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.35.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.35.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.35.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.35.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.35.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "c84fb73ee4c9a3b52d6527c898e22618" | |
| }, | |
| { | |
| "dataPath": "params_shard_94.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d98e2cfa9cecd87e24e73fb76d5a3deb" | |
| }, | |
| { | |
| "dataPath": "params_shard_95.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "de7a514e535c33b48378420db47c349c" | |
| }, | |
| { | |
| "dataPath": "params_shard_96.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "54b9a0bb8803eac67827925ca968f7a8" | |
| }, | |
| { | |
| "dataPath": "params_shard_97.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b9ea49c37cff1afa452e4398722bdc72" | |
| }, | |
| { | |
| "dataPath": "params_shard_98.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7fcd5b011e60281af583b0bfa34961d3" | |
| }, | |
| { | |
| "dataPath": "params_shard_99.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "571dcbfdf70e7a6e5ce238b010d10d71" | |
| }, | |
| { | |
| "dataPath": "params_shard_100.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aaf376f0cf5b6c9b4fbb8363679155d9" | |
| }, | |
| { | |
| "dataPath": "params_shard_101.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "351102637ebed9b4a484db62aa8bea17" | |
| }, | |
| { | |
| "dataPath": "params_shard_102.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "8d5096a1e6412f27f9e2fb3862564a11" | |
| }, | |
| { | |
| "dataPath": "params_shard_103.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "06b2ebfcbe31df25c6f243d071b6ca9d" | |
| }, | |
| { | |
| "dataPath": "params_shard_104.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "186b5edb1c234c01120de109610b4b72" | |
| }, | |
| { | |
| "dataPath": "params_shard_105.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "e0dd964ebf995377639855d43392f8ef" | |
| }, | |
| { | |
| "dataPath": "params_shard_106.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.qweight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c8a32a2ddd4394d7a8049433f588c139" | |
| }, | |
| { | |
| "dataPath": "params_shard_107.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0cb8fc062a80f9444bec32464bb9c193" | |
| }, | |
| { | |
| "dataPath": "params_shard_108.bin", | |
| "format": "raw-shard", | |
| "nbytes": 24756736, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.qzeros", | |
| "shape": [ | |
| 96, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 196608, | |
| "byteOffset": 21799168 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.scales", | |
| "shape": [ | |
| 96, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 786432, | |
| "byteOffset": 21995776 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 3072 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 22782208 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.scales", | |
| "shape": [ | |
| 32, | |
| 24576 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1572864, | |
| "byteOffset": 23175424 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 24748288 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 24756480 | |
| } | |
| ], | |
| "md5sum": "9acd66406bad5be1d44e4df09de3b7bf" | |
| }, | |
| { | |
| "dataPath": "params_shard_109.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21799168, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.qweight", | |
| "shape": [ | |
| 4096, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 12582912, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.qzeros", | |
| "shape": [ | |
| 32, | |
| 768 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 98304, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.scales", | |
| "shape": [ | |
| 32, | |
| 6144 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 393216, | |
| "byteOffset": 12681216 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.qweight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 13074432 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.qzeros", | |
| "shape": [ | |
| 32, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 65536, | |
| "byteOffset": 21463040 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.scales", | |
| "shape": [ | |
| 32, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 262144, | |
| "byteOffset": 21528576 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 21790720 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21790976 | |
| } | |
| ], | |
| "md5sum": "79b240b6291e8230d0a54ae9e491c3ec" | |
| } | |
| ] | |
| } |