diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,12343 @@ +{ + "metadata": { + "ParamSize": 805, + "ParamBytes": 31776318464.0, + "BitsPerParam": 3.04023285660184 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 420679680, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 128256, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 420679680, + "byteOffset": 0 + } + ], + "md5sum": "79cdbbd110de7ccddb1a2f1e20de7ac6" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 52584960, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 128256, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52584960, + "byteOffset": 0 + } + ], + "md5sum": "3b30e9c6343f50a2014db07dfbe274f1" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "a7b494a842b6e7d72a57b59255484045" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "5afaa909ee6b8ce2418a868954ce8424" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 23560192, + "records": [ + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 0 + }, + { + "name": "model.layers.78.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 16384 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23543808 + } + ], + "md5sum": "41d0b8400d88b59a867d034358fcd411" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "059a7d85ec21ea071c8381af2bed14ac" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "ef39d92b6f5455fa49778bd3963bb8de" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.79.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "697e37f857f64b4d54cf5c6545a58c7a" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "b40ffe847002075ab000d2a8cea5cba1" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 420679680, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128256, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 420679680, + "byteOffset": 0 + } + ], + "md5sum": "8b8a02a0c14245a32993c53a4df286b0" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 52584960, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128256, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52584960, + "byteOffset": 0 + } + ], + "md5sum": "8c3f7f6a3dadd8a3303d0c97575306d9" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "b82f2cdf9161f01f3a257210e6dea0bb" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "9021d99e6f10c7498a85b258b0c9db1c" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "7523c5ce188328e68cca18b82b72f266" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "3391a2b732afdb68726847e610d4a1fc" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.79.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.79.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19337216 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19353600 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "6f8397f19dcf553a905a285aa02a5c4c" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "7b32bb3187133e9811f7869bd1620210" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "5a9d015e8949959bac87ad362341081c" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "21d48abcf0a261bb44f041d194ee4b22" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "72574b86e684cce8814a63021574b434" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 3358720 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 26869760 + } + ], + "md5sum": "0b4b5789767ab69f93ab3bb991fd8d65" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "1d95b2b19abe8fade99cb5cf301e75d7" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "5089b37b21725132bf244cf93afc5874" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "2146dee05b38ccf4a6915cbb3ae7721b" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "6431c5131a115c69d8687c340763e2ed" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "33eb6e733bd215ffadc0639f47f85473" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "d2cbdaa4be2069ef4e112323ed9919e3" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 15155200 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26902528 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 26918912 + } + ], + "md5sum": "caac8ef607ba128f493ef871b3d6c3d9" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "7efda4188558a786382fd72c76eafcd1" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "d7021b32dac33c8a58f74f24a19fd983" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "7abcbb5242ba1ced66655bb9cdf9a265" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "77d07962f836339f528b07673fdb6539" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "2c76c1f794e309ac1614ddff620f7fd3" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "00287df312930aaff8bc0afce7c7d992" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "574d306ed44157352a8a6ac450e732fd" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "6cb374c34d43b3bda41a3bcc20e91615" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 30269440, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15138816 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19337216 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 22695936 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 26894336 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30253056 + } + ], + "md5sum": "7d11b5bf2c7dfa2ea80ed04612f0d019" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "fa6abf918e2cba5baa602c47b37727ec" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "162120f64eac538bdbc18ed9d164aa38" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "cf529e13d0a28768f15eb2d96a47be9a" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "673768d25d49f7cc3525e4e5f20bbb01" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "49dc3ac2600f045763b41e5b0451948a" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "a75e4683bfe12c348c4e8f8f0ea0c2cc" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "8e87607706352f9f0c658f2f30b1bd22" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "54a06b6517666da3d7f271e0044d61f3" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19337216 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31084544 + } + ], + "md5sum": "4f1050b26006ec257aecb49ade39f629" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "a4e0f6947b19f9b7219fa61272eea4ff" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "ab673a66b934a744f1d9efd50765507a" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "9b66573f29463afb9d8331a9705d1af2" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "d613aeeaa93405e664ffa2a3cc588257" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "a3d6ea4031e8e46f50fce6416dc207d8" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "35448ff6b1aa9513ed35e5b57dc0e19d" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "3822c3342588492b7b412d62851cfade" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 22712320, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 3358720 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 7557120 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 10915840 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 10932224 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22679552 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22695936 + } + ], + "md5sum": "6d7993aa2ea490cd2856b677bd7729b3" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "3942b36b592b53da4ebc8abb695f3db4" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "1ac979e0c2775869b9ca326522f9734e" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "4f60a10700340c096b4dead70d9b6268" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "53545cb46e48722d3567abc3a677df24" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "fa614df1d0324627f48c7f0c9fcb9687" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "48c9d5a3710bdcb9cf572e767d97710a" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "a366c667027f4fc409215aebc4efcc31" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "d25a823c473265e49522aab71be34c7a" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "c8ec37280ac85cc5c96be4dbca1d4b37" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "546dadc3565b34f60167de534790be83" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "9ecc7f3527689391882f6000e81c31b0" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "3e3e59e4a99e7a9806f655f704dc70c8" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "7d82284e2f0cf6c60406a6e02376bd1a" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "8b50484ccf2d837042b58a1eb259d90b" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "6c7b439e92d9e940919473f8ebe99753" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "bf119fb2787116fd582d9bba550bcd0b" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "3c4f41cb4f44a8267e3ca67f5ed62508" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19304448 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19337216 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19353600 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "2714eb8a7458f6b74aff9552f572b4fb" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "4c915342c44dbe5ee9a38ac7095db674" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "6ea2d5d6db9bf65d390a0276a98633c9" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "dc560a16cc7946e22de920a8f6de703f" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "2607cb04f50769c84154b4f7c45f6056" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "a5302ba67d3bbefa995039869e6a8f22" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "6b1ce68d4e9e4d85c7f4b38b5efe9ce8" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "44c0281c903b1712d7ae71dcf4947c98" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "cd01533c8b4721d8ff627059b7cc4b4b" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "bf1f7b509c2d2963ffac1487c37d3e34" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "a0c08e392b1bb60286378ca288a735c4" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "45bbaea2c1f015acc7cf1321faf11b21" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 30269440, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15138816 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19337216 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 22695936 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 26894336 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30253056 + } + ], + "md5sum": "99bce1b9114cde1414fb0ae3f1094c60" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "be83a617003dc96cbf2053fbd050e00d" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "67dfb500296a6a256548d2fd1c47be54" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "80c71deda6c8d9b3d6e0c20e254fe920" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "d0dd28446eb0199978718d3b3970857c" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "d76c78dd46d04d849e8d01e6b52f25e1" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "c3d3e83889eef688383ee2202ad00e07" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "1cb20e1716bad3289753847f06da6d29" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "8ea3e0a0e37899be5bab0bdf7cbc57ff" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "4aca62becbf6cc729d7e6aef0999a913" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "3e6f3f0e50b4d324495aab3160961083" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "4a2c70ef3a66bb61fc35f43a107c8d59" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "9b32f32e8b1d1fd719f5fadad7937ce4" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "fd1b9fce3abe778e9b654f015a91ef0f" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "6eac00b45db06328cafc7d6fcfafadd2" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "79893ca5618452bd49d6eb21e5c1a27b" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "3f0c693ce55ccdbf2c3d5ad2cb33a478" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "013a6ad49c2c89a770d0b71baaa5a202" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "918baa92d5735a0148edec7aa7ba8ac3" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "037c6f0beda4210e71295c6521d55dfc" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "6794aa484651ba1f514b8be9bf5f1704" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "875cbb2031a94ae1f853981809b8587f" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "ede4fc697012b793b478d8dbe77c4bf7" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "d91bc729e1a85bb7dc6a6230c762aaf1" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "07831c160677febc293de24ebeaee1f5" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "83e87195b485e1813b80721ccdedc4d3" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "4d909964566221a2a05ffb8e7fe01dc0" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "cb62e3a4a4fb14ce35120e60019b85b2" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "51ffaf0b31cab2039f1e3ef69b426cbc" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "a5da1369a747d0223c2245787565b600" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "068270946e8e0b94c743b1f570ad0456" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "054a2a63f28c370a626ed2d5fc54764c" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "97a643f062521abc831f7cacfcc67d67" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19304448 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19337216 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19353600 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "dabac5556e2d11b86e247eace4a23a77" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "4f6076424dfcbab6a1b97f4b9190eee4" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "d26049e021ad899f06f6b85ce25a0226" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "a7a5fe64d267f4fb48a5d21b2665b76c" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "25ce498101097e8054a3b21a698e4141" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "ae744aa77b778893fd6665ae437fd6c7" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "0a622fceb54960e5aa5fe935cf6352c9" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "574f2b7e69c232d661e0eece3fc0bb13" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "908d156e2e53561115ed30635cd3a1b2" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "4da05985a10aab57721bddf5596a4421" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "1ca84ba5dcde33f2d17b55da92835fd7" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "5b18be04adfcfdaa0d8d4dbbb166de4a" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 30269440, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15138816 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19337216 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 22695936 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 26894336 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30253056 + } + ], + "md5sum": "d7f13270e21c825c9e0f18874aaafb81" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "172be0424d1897c86e4dea8d668f961b" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "dbc94bcf8183d7d9609e3c37e37bb1ca" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "acf5655337f0c0e05adef76409060404" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "0d3cca735c48a57939ced788634bbd28" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "42c76d7d4eac5d3dac12f2042071d753" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "c4118a266c0cda1550d297d153749240" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "7b6e7d6eefcc9ea2818a9708b4f1da70" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "8f47b61f443463f7a0c27dc599d39b18" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "c44caceb4fe53ab301b92a19da3a386e" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "4e1b264d9e67a68c7235b774bab88ef5" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "518b11bc9b5e5f482864a06b4a40fc16" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "39409c4c628752ace29be02806776f8e" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "5007e02dff6e1ad4b78c33a5cd5c4c6b" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "36aa7935e6228ffcc35d90057d0160ab" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "f468ba4244fd3f70815035540e0510e7" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "1f56ff72f84dbd031f690b4160bb3f68" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "8cab4529afddd38601da177c4f4836bd" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "7b51246adf348ffd195531e806bcb534" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "4adc63be9ed1998130780a2864731a6b" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "10712d57bfc1ffa1dc4cfa4f64985ec7" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "d5b8efc7319653852fb002ea580d1843" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "6f6cfb266b551435acd874e25ffbaa1a" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "bafc49958ff513d1356ffcd631b91293" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "42979570be595430e4b23fd5281a70f2" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "c5303995cfed1bdfe7973a43d3ab1f9d" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "1c3de56b47ae0f62ea38b7502f12b84e" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "a71c322be37a9a7886e193387affd261" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "1794aa25f83b0fb9c5d819691b893ed1" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "1fea0f5c0b1c2b827f3c3e2a5ef5f758" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "4a9300e211ba14bb1e054b8c74d23730" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "9db2edfdbe4e0657fae707eb51f7e028" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "b07642c5024a2a657e97074ec458b8d8" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19304448 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19337216 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19353600 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "eabee2f9ab92842183e87dc223d90009" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "30b53461342fbc2c797adff85f9366fc" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "73149fafdf03eb08ca1c8fb8e61eea64" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "f0f4552979d86929e06df418e80ebd9c" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "b94d938d07caa89bb35894ea758630c3" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "af234ea2cbcea120b6837e1c12ca7092" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "eac5c2b117b8409e55076c9f1bdc74ce" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "c80b53aacb20c5676ef7ec612607fe16" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "f7b7da3d0f08043e70b99f315ad7b03f" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "fca06c40ae2a3f1ea892fe980b5a530b" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "5ef30eb53ef019e81caa284dc55ce9da" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "d5cd403e3e382705f37a22fb175fcd32" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 30269440, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15138816 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19337216 + }, + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 22695936 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 26894336 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30253056 + } + ], + "md5sum": "ce04c8ca2f38d4e1395c88dd9e5d0c3c" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "bc22a3549a56bf22fe17f2004121e938" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "538c33199e314389ad8211ca3d80140d" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "8930f7d891c246fc119ae97597faaf14" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "699754b50406b68c05d15b1ff4bfe4ba" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "a01143ead721a2a30686497e7c2afeba" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "50ff92ad68f01b3434bfcd4a12f57f56" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "e0329e0d6b2b14385247f977f4a28c7b" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "40f84e7ff2af9bf9196de4a33e4cf801" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "5591299adfe8c138137e2c052ec17b8e" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "c62225c3f4c474fe464b02dfa8fc6540" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "ff71307d37e26c96b4b952a876bb475a" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "876aeec6869ebbed6d90d0737a73fa12" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "2103c34abcad05aeed4e8d3c245b10eb" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "7218a22920352a90745ae74e4ca58da5" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "e97411b24eed3b70b47cbb31f536afb4" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "19fe866ac68693e567a5c4f0de6b68d0" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "e4ab2a94cc7c569fbd05e41468b319d4" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "83c65ba5244fea1ccea6451d10b5511e" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "d5ee69673f7d8e1886cc2374b83e769c" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "69c836efc877ad6e92fa9df264a9303e" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "c2abbd1f851192a726a10cd63b18c3e2" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "56332ae2c9c1e14610d1f83fc28a19d7" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "d401ad9e22a12d5f2abe86c5caa08cef" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "3b6b2358bce51c681365b2f4956c7c57" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "05ecf3beb0277c095808ef1a5943c136" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "241d92b247c245e7e0ac2da4f0d5bd62" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "fdecd51d38dc83044fc47cb0ae0cb435" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "49d3307a595f781a81f34a272c655f12" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "06e3dd351f6ecfd40c646059f16021a9" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "3b2e0909542eb62c028e64172f46e279" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "62f0a6b3ac812f7d9455f448ffcb0b6f" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "358b1193d40d7c4d3f70245b12aa2129" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19304448 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19337216 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19353600 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "4d6fafc77d655743d145ba393d881192" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "14d2e0bc76058a10dce186660e72e17e" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "c42829898f93df85944c0fedf30997cc" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "04394ed5e3865d539ef72795043d224a" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "5f5c2d0fdc01b6f5f544901104e08695" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "97de6a18cf242be2a183b1ff794a7d5a" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "520c87a2811aa53d65f32381b0237f37" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "ab016179053ac29bc00f73d45ff98898" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "2ebd840ac59090ff7da413d4cc3a84fa" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "5c7f6842cceeb439a08bbd4f77fc612f" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "6fa171fe6de3d2997c727a5fe88e1178" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "3fa3c72e836f68c5d40fccb61bc0cf14" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 30269440, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.40.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15138816 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19337216 + }, + { + "name": "model.layers.41.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 22695936 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 26894336 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30253056 + } + ], + "md5sum": "8c4496bfc94552d61c0d182fc084409a" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "957e51f3ad0776015c785be0bf79da16" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "4ee1b4147bed836fcfd9bf492f345c68" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "f945bd179235defc00b62b5770f086fe" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "3ebb0c3a508d54fd0fb5044cf18c40da" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "0b98cde111da46f3a946380406313141" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "fc013304ac4ace481b14d7c49e2966a2" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "2639b0e379aeb19d979f2fc43e853609" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "5df595698a188d6ab3187c7f90236f72" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "71cc8b9df4656cf45616d9b750e23e4b" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "287e82dc565c2726d617afb009692160" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "44e54d00ef58276303e6bdbdb0375278" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "1e254cc58c2a200e17353789bcb4fffc" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "d43bd645d9afb081fdd7f39b343af674" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "236c110f2f2840e78210550645b17576" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "9087b2e8d805b71c42102bb4f351161a" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19304448 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19320832 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31068160 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31084544 + } + ], + "md5sum": "39a2c36063fc64ad5122c5034ec3e7f8" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "11f7de14aeb0c0579dd1bce561e0dd73" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "10f91ca3f51462a32795aacfdb8a3841" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "6194b469e0869bd15edbaa1e9c76ff19" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "95ba0bab1b63e8801a5ddf7ef3860617" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "fef775966e3548b7e7c5c7fbac9e6d1c" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "1a2f56bb4ebc0f8ffd1b175ddedb5473" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "21bea814718e73c0415edd2326a98784" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "522f98957f243ec7998a0cb9f08523bc" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.42.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19337216 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31084544 + } + ], + "md5sum": "d9f770610a52fecb8a46e22c5cc30c7a" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "d7e309fa76463d77dc8e3435308773f7" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.44.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "ea487b1d07da4adab3160179c7014ad7" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "79d2389ff5c4124d2b9c2ae162513924" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "d48aefee9f7db4a9d198d96100fe85d9" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "63d23d1e98b618b29495c16ecf6288ee" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "74080dad057426f845e457bb34a7e5d9" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "955e06b79eebff3d644f0d7d3c27ba83" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 22712320, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.44.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 3358720 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 7557120 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 10915840 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 10932224 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22679552 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22695936 + } + ], + "md5sum": "2697843ee12f3473efac4131fc9b1e53" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "f8dab8040bfd516e013a4683601e3f23" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "31f59ce0e1e3dd3c622f93279f6cb666" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "608552569e685977ae7f97a2e07ded78" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "7bc55de92185fc310ef82c6f93c2ae61" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "4436711989be1fceee06fba9b82412e7" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "17ae88f77e0743ce409bcb4b1ab8b3d2" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "5f591c9b73d52af3b6ed049aff80ee23" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.46.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "4d461a1d95e1f66ce720e7c49ab9fb3f" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.45.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19320832 + } + ], + "md5sum": "4d6cd28c9beeca0af8aa39606f21b8ef" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.46.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "d906f0718b295a0f77c4a1523e8c0e94" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "afa50c16d1a6086a5091008fb1892295" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "165c4ded8825dff48392a6345b80a570" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "1685d8e93bbf9a8176fe9eb6c0f0e850" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.47.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "8195047858a3e08d93b79b626a7e15b0" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "9219787f636fc35fc28569063f0fda82" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "5fa06e749aa3b4ce3f7c91e3b4c85dc9" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 22745088, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3375104 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3391488 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3407872 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15155200 + }, + { + "name": "model.layers.47.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15171584 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19369984 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22728704 + } + ], + "md5sum": "0f542d99cd9b653d4b0bf82a6b004f37" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "94fc05aea36b09d469becb3f13860218" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "84ce2d9c88615f93bdc206222cd3d5b2" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.48.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "4d14fc90894dcae4e327bd2af70f762a" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "911716952cd15efcd81fa6057960e0d8" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "7ef2cc2c8a3594af1aeb7b48128caeec" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "bdc63c6c35351c670810b5271c95d6cc" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.49.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "6904cbc7443e52e04a194a954de7725a" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "70385f3a6ee3c547b8d76c5aa591a83d" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "3ea65371824ccd24d321a104ea87cac3" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.48.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.49.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "cc021b1c69ef95f85ee3ba5f4e3ea2a8" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "543572eec04507cdb18a8db520f08dbc" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "11c32cce2adc64001006f4ac242863fb" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "f7e59f9a27acefe6643d22ac6a3936b9" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.50.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "1f92881b093cbb5ce56ca19b30e426c8" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "5cad703ba954c1e923829d564d09ebf1" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "b211388796819b029ee29dcc23a98da4" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.50.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "788fb4a4a47432952f9e8dd713e687cb" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "ebc273ee4a6c850fcf4bbab13f7bedbc" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "6b9bf4c6296bee6705ed9edaf3290f82" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.51.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "cf77dc8e8b511f833c53ed09bd17c169" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "5c56984c66bba9ddce651a320787ae78" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.52.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "536d57e7c7b61009db0e62417efbb3a4" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "5a70f4ee59a22575ce0a203832cdabb9" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "579c9e05885e158ff88e9cd870b1962f" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.51.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.52.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "783c9338fa120e3f36a8be6d8d2dfc8a" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "cf159c5d60489a4d0db03a0330dd06a4" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "679abefffac9bb8e2eeda2695931c039" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "de51b0921287e8627e9737aca0852d14" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "babf2ac4eba0ebda0b3191bd24dd97a2" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "f2698420f460893a1b14234263feb000" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.53.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "cd8ea1ee642120f1f89d6427a974fa35" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "c49e69e9764786ce6ca9ec99aa222466" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "061c1e2c3e818091113b6a398159dd9a" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.53.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "517c78d92d193228f06361c2704b1d64" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "c7e0f8a48e3bfcc9af344746f1bec5a3" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "ab628697a59bb432d004bf83c99e30de" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.54.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "63c5db62a180d667c71bf4a6634d2b69" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "1d5b4b9c67bab6aad9f6338d3d69e531" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "f0652c9d425680f694f8d11b13e40816" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "14de629b5da0b34389fe2cadbea949c0" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "7165cebe5c7ed88ee883df3ebaf4ed1e" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.55.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "b4e35b505385ae5daa99c28ceb6f6492" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.54.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19304448 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19337216 + }, + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19353600 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "1aec315da1ffe96dbc4e95ae99c921c2" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.55.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "112e6607a463fb0e4d1d7ea378d889eb" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "63d8118088309b8ff7b50ca43193c60a" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "51980f9f99479fa3d1991e9d6e249201" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "4e06aab8439948471b6e9702c817868c" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.56.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "0be825e789de71459c9d38fa8fac65bb" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "73eea1166bcb6376f5e12abc44551361" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "b63095e6f1509b2929c4792cd6d9a925" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "7b4a667fa3eed9f56e95d98bcc3a6bd0" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.57.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "f8149f51eddd259d173242c02237a751" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "1afebf0312fb1503d9bb7d4f35ed572c" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "8840d363d1fe9ec152c0c3c884678586" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 30269440, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.56.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15138816 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19337216 + }, + { + "name": "model.layers.57.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 22695936 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 26894336 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30253056 + } + ], + "md5sum": "56a253f5052fe183c6cb287c7f991e81" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "757b668328ba63deb853d87cb0ffb311" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "8a5da53d1c1c18edb84b5556c8447824" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "2639f2132d4be9946c2885ae1ab63c7b" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.58.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "eee87b51a4b468bf1fd02464fe09e411" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "0063c32518767ed5b1d95fd5367ebe77" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "e5e0d0bee5a2d331524cc568c2e94a86" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.58.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "888bfac2bfb8eaca6c9c2bbace82537b" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "5d51b6693dbdbecdf0109ece08125f6c" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "06fc54f807880bb21eec5b5be909c4f4" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.59.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "fed3599a9e0cd58a5a0ff652c019015c" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "4ca9de0ac832a539cbc11a474f510d3e" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.60.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "e8783ff87570b118e00f7577d4821320" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "e8bab125bac7293dfa3829f76ca9ac83" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "5ac99e7af6c163ee9fd1c908fd5222e1" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 26927104, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.59.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.60.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26894336 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26910720 + } + ], + "md5sum": "312dd9a20f74870378f85c9a691f266b" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "745de801519d0beb5f85e9c906464e90" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "2147cab5b36ff2efacc6628e380fe105" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "6c636b78f31152713800ce78637e7dfc" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "35c9d44d80887961910f32484ff44247" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "ba77d65afb818284e72c34a53a2e9c66" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "7f3bba8acf23ba3d324797d3015a78df" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "0cfc916f4d513ccd4930cead36daaea4" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "cba0f9afe96dad1ead23599725c38837" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19337216 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31084544 + } + ], + "md5sum": "a527a3a89ed1619865e88e9afbf38896" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "ca1a13cec5750139aa001d05069d9361" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "e73492c4c6c7ad5a24205e4125b42923" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "1e1511af563f8a4d20e4d4a1d4bc8630" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "23a1bee3401d40537b395fb673738802" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "6aa2f2d8cfdbd82b4776b6068f84f1a7" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "8aeead059ec0bc46d1d4382eb2721804" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "7c8f2d25bfab6313cdf34a7bd91fc4cb" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 22712320, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 3358720 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 7557120 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 10915840 + }, + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 10932224 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22679552 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22695936 + } + ], + "md5sum": "f036caf5164b8124ecf21205a13fb8f2" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "567e22d32bfc6d2c68bddcd9e2cc415c" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "8f3a708d56b035f193e6701848448a4c" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.61.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "d0f06a13ca193e5686fa378293c3279e" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "66b3075a348c192c23c4e0241149af02" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "2d6077133a92d11f3b4b51527c13b0b5" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "17c1403ca9d36c4a1e2fc82db9af3009" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "21673c0591dc34e515c14995f7c84734" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.62.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "e5946d1d0bc1c73cbc045963f04f4d74" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.61.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19320832 + } + ], + "md5sum": "d75bbf2ffa6fcf72d7162b2140512bc6" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.62.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "48d9bea1426872f792fd1cc8c051caad" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "23b4fcd1b7b99a184812687e93ed81d8" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "f60a3350343f307f84c847a4b98916f8" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "549a917f37aa1fba5be5827eab355487" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.63.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "9c18363da8b41f9c513028429c651c23" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "eef554a964357b58b5b416cb3fa20449" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "30f0c74583d34cf5275e21a328d3230f" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 22745088, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3375104 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3391488 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3407872 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15155200 + }, + { + "name": "model.layers.63.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15171584 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19369984 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 22728704 + } + ], + "md5sum": "e32dda050e123b799ea8a56c13d50e19" + }, + { + "dataPath": "params_shard_373.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "d43f98754798afa035c4ed1fc7269d85" + }, + { + "dataPath": "params_shard_374.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "c1da5ad42d1b271ea327c06b87227738" + }, + { + "dataPath": "params_shard_375.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.64.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "b65c1de0b7691c9cd9da137b142ca02e" + }, + { + "dataPath": "params_shard_376.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "28651aee1ee8091c18e7284bb2e16e66" + }, + { + "dataPath": "params_shard_377.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "5da05ac00e46adb6b499554d02f85355" + }, + { + "dataPath": "params_shard_378.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "4f4687e51c1bb229a9183002d757a865" + }, + { + "dataPath": "params_shard_379.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.65.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "977ffbf711f0a7f1d67a422f23a05db5" + }, + { + "dataPath": "params_shard_380.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "01f6eaad3bda217d13720dcee9a5b87b" + }, + { + "dataPath": "params_shard_381.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "9932fdec007234c931eb2935836a1f3b" + }, + { + "dataPath": "params_shard_382.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.64.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.64.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.65.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.65.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "728c8eeb7eaffea8ecb137f822c81e08" + }, + { + "dataPath": "params_shard_383.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "1a788b7acb15cbec07145042f27b2fba" + }, + { + "dataPath": "params_shard_384.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "ede815323e680576a8d729ffd48b993d" + }, + { + "dataPath": "params_shard_385.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "73343b40700331490a292d7e7d3c8035" + }, + { + "dataPath": "params_shard_386.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.66.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "f341d90460f8448a3723b6306e530f1a" + }, + { + "dataPath": "params_shard_387.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "15ba85d227658d4aee4817f478b577e5" + }, + { + "dataPath": "params_shard_388.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "c8bd54a4f25343c79da5049089bc7dad" + }, + { + "dataPath": "params_shard_389.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.66.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.66.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.66.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "15776278ddf20a23c83a86bbc7486b6c" + }, + { + "dataPath": "params_shard_390.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "e62c4b8d697f8873ac94f17658b1323a" + }, + { + "dataPath": "params_shard_391.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "5485b495a33a9356ea476068f9f795d6" + }, + { + "dataPath": "params_shard_392.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.67.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "5b9359e0a54e466b5909f31542f7f036" + }, + { + "dataPath": "params_shard_393.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "2846db55feb473e0dd945b74de3fd239" + }, + { + "dataPath": "params_shard_394.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.68.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "11cfc34fd57b477db7c2cdb45ad215b0" + }, + { + "dataPath": "params_shard_395.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "f16321c71f20019d772892cbaed106a1" + }, + { + "dataPath": "params_shard_396.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "78b8b00443ad4f31f67f997577e34fa5" + }, + { + "dataPath": "params_shard_397.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.67.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.67.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.68.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.68.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "cb755c8a9873d47a2bc2f0ec15f645d8" + }, + { + "dataPath": "params_shard_398.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "2b4dd5448dea8b14fe85a6b0945ddaff" + }, + { + "dataPath": "params_shard_399.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "9e96fda54e0654811127719cfc172a51" + }, + { + "dataPath": "params_shard_400.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "32851c03f419fb8f7a0c008c0787df68" + }, + { + "dataPath": "params_shard_401.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "85defc1b92290a22e98f3d0197e9cc5e" + }, + { + "dataPath": "params_shard_402.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "759432737f374e1a5f269696fd86f726" + }, + { + "dataPath": "params_shard_403.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.69.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "17c23e216f3e5b0ec782c6cec59b2f43" + }, + { + "dataPath": "params_shard_404.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "0030e1ec7cd4958bd2b9ddeb5c2e02dd" + }, + { + "dataPath": "params_shard_405.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "60e05c79cddecc3a39e72adbd8888d42" + }, + { + "dataPath": "params_shard_406.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.69.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.69.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.69.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "9bcbc7b2095532fc61af1b4fbf08b2da" + }, + { + "dataPath": "params_shard_407.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "47fd824b3de6eed43c6ad44a6ae6e85a" + }, + { + "dataPath": "params_shard_408.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "463402ad51c3cf01550169915021e10c" + }, + { + "dataPath": "params_shard_409.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.70.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "58d1f8220a4cb612f5883bc523a6c8fb" + }, + { + "dataPath": "params_shard_410.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "bf5db19ac9e7d12d022f2c7f5cafc36e" + }, + { + "dataPath": "params_shard_411.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "98f6d04e2edd7c37d8695e3fe253621b" + }, + { + "dataPath": "params_shard_412.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "7f065ea5725ba560e84934340a2b0a25" + }, + { + "dataPath": "params_shard_413.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "1f155c42228cfb49b985edbcb1d0da85" + }, + { + "dataPath": "params_shard_414.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.71.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "da782ab956192657acfee4aa18822252" + }, + { + "dataPath": "params_shard_415.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.70.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.70.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19304448 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19320832 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19337216 + }, + { + "name": "model.layers.71.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 19353600 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "a4e6af3b3e9f89b67fa10cf6a8587d0e" + }, + { + "dataPath": "params_shard_416.bin", + "format": "raw-shard", + "nbytes": 31068160, + "records": [ + { + "name": "model.layers.71.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 0 + }, + { + "name": "model.layers.71.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 4198400 + } + ], + "md5sum": "2221d5851b96036ef00d092be251995c" + }, + { + "dataPath": "params_shard_417.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.72.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "c27aafdcb1fd510cd92400c88cf2b51f" + }, + { + "dataPath": "params_shard_418.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "63886943a345bc134f9022ea336d796c" + }, + { + "dataPath": "params_shard_419.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "f44b52cb74fa76468da62cec012e395b" + }, + { + "dataPath": "params_shard_420.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.72.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "f3530ed03f31e068a89db1db2f95e3a1" + }, + { + "dataPath": "params_shard_421.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "1a83e6700efabe396d339a91c79d5856" + }, + { + "dataPath": "params_shard_422.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "9b85f3a5f0e27093370681a394111ae0" + }, + { + "dataPath": "params_shard_423.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "12a0fd0cf6303d6c2b093636ef107f6e" + }, + { + "dataPath": "params_shard_424.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.73.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "2374a16faac055035792e859ec5d83fc" + }, + { + "dataPath": "params_shard_425.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "d2ccebd7916bcbe6c281ff38a01dc25d" + }, + { + "dataPath": "params_shard_426.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "a4d75e38e466bac0bd61d08ec8fe7cf2" + }, + { + "dataPath": "params_shard_427.bin", + "format": "raw-shard", + "nbytes": 30269440, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 0 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3358720 + }, + { + "name": "model.layers.72.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 3375104 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15122432 + }, + { + "name": "model.layers.72.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 15138816 + }, + { + "name": "model.layers.72.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 19337216 + }, + { + "name": "model.layers.73.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 22695936 + }, + { + "name": "model.layers.73.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 26894336 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30253056 + } + ], + "md5sum": "e8466b1ee854925f3f47edc4f69d1ec1" + }, + { + "dataPath": "params_shard_428.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "7fce10fd2b99ee65baf900c9cf245edf" + }, + { + "dataPath": "params_shard_429.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "5bee1023b5f7b65e4f1f88abd1649041" + }, + { + "dataPath": "params_shard_430.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "704744da29c5075546d90229eee36f92" + }, + { + "dataPath": "params_shard_431.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.74.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "bd8feed5ee06472ac8ed0703bc4f061e" + }, + { + "dataPath": "params_shard_432.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "00a5d074b63a65296875e14ea6844d9c" + }, + { + "dataPath": "params_shard_433.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "dc087afb2c283f6ad74f548c5cf0e8d8" + }, + { + "dataPath": "params_shard_434.bin", + "format": "raw-shard", + "nbytes": 31117312, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.74.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.74.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31100928 + } + ], + "md5sum": "2d20721605716c031658bca06d557721" + }, + { + "dataPath": "params_shard_435.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "0ad90a190f98943f7708505b5523d319" + }, + { + "dataPath": "params_shard_436.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "bb1b528a70b68292f3f22265765a2284" + }, + { + "dataPath": "params_shard_437.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.75.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "d81c21ea696779f6cf5e7583c3490086" + }, + { + "dataPath": "params_shard_438.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "73fe87191405725d19feac281c70fdb3" + }, + { + "dataPath": "params_shard_439.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.76.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "869e55962861fb8abfe9f23285ba353b" + }, + { + "dataPath": "params_shard_440.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "09e4b49f22e8a7b91fee83ebdc61608f" + }, + { + "dataPath": "params_shard_441.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "b594fd883e16dfe18df811d2307bb876" + }, + { + "dataPath": "params_shard_442.bin", + "format": "raw-shard", + "nbytes": 26894336, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.75.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11763712 + }, + { + "name": "model.layers.75.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15962112 + }, + { + "name": "model.layers.76.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 19320832 + }, + { + "name": "model.layers.76.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 23519232 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 26877952 + } + ], + "md5sum": "6bdadf102248847734ba597bfe8c41b4" + }, + { + "dataPath": "params_shard_443.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "e1e4ec48b412f31c1fd0f74a92b7d3f0" + }, + { + "dataPath": "params_shard_444.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "909761c5b71d9b3eae6a056ce710483b" + }, + { + "dataPath": "params_shard_445.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "72dfdacf07339de7217de2512609e7dd" + }, + { + "dataPath": "params_shard_446.bin", + "format": "raw-shard", + "nbytes": 188088320, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 188088320, + "byteOffset": 0 + } + ], + "md5sum": "f547514098e4448601da1545ee4abcbc" + }, + { + "dataPath": "params_shard_447.bin", + "format": "raw-shard", + "nbytes": 23511040, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 23511040, + "byteOffset": 0 + } + ], + "md5sum": "248431a4d54737d3054fa3eb04ac1d81" + }, + { + "dataPath": "params_shard_448.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.77.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "c5a1f3e7b594932d22d3d0d9bc15cd5f" + }, + { + "dataPath": "params_shard_449.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "b11ab96c461449a5bef8f81cf4895d6c" + }, + { + "dataPath": "params_shard_450.bin", + "format": "raw-shard", + "nbytes": 93978624, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_weight", + "shape": [ + 8192, + 2868 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 93978624, + "byteOffset": 0 + } + ], + "md5sum": "7fb381d24a0d0a94eae345c1dc793fef" + }, + { + "dataPath": "params_shard_451.bin", + "format": "raw-shard", + "nbytes": 31100928, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11747328 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 11763712 + }, + { + "name": "model.layers.77.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 11780096 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 23527424 + }, + { + "name": "model.layers.77.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 23543808 + }, + { + "name": "model.layers.77.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 27742208 + } + ], + "md5sum": "0275bb6c6530aa37dd71952e4a224be5" + }, + { + "dataPath": "params_shard_452.bin", + "format": "raw-shard", + "nbytes": 33587200, + "records": [ + { + "name": "model.layers.78.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33587200, + "byteOffset": 0 + } + ], + "md5sum": "4992695c89fe0c277765c4d787fcfb4c" + }, + { + "dataPath": "params_shard_453.bin", + "format": "raw-shard", + "nbytes": 26869760, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26869760, + "byteOffset": 0 + } + ], + "md5sum": "11aeafea7270233b3856538aba8478c6" + }, + { + "dataPath": "params_shard_454.bin", + "format": "raw-shard", + "nbytes": 19304448, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_scale", + "shape": [ + 8192, + 717 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 11747328, + "byteOffset": 0 + }, + { + "name": "model.layers.78.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4198400, + "byteOffset": 11747328 + }, + { + "name": "model.layers.78.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3358720, + "byteOffset": 15945728 + } + ], + "md5sum": "1fedb58739022a8929ff28d88dce4c6e" + } + ] +} \ No newline at end of file