diff --git "a/ndarray-cache-b16.json" "b/ndarray-cache-b16.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache-b16.json" @@ -0,0 +1,10505 @@ +{ + "metadata": { + "ParamSize": 867, + "ParamBytes": 7356355584.0, + "BitsPerParam": 4.8288565767176745 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 503439360, + "records": [ + { + "name": "language_model.model.embed_tokens.q_weight", + "shape": [ + 262208, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 503439360, + "byteOffset": 0 + } + ], + "md5sum": "6dbeddc336f544ac10a102bedbacda41" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 62929920, + "records": [ + { + "name": "language_model.model.embed_tokens.q_scale", + "shape": [ + 262208, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 62929920, + "byteOffset": 0 + } + ], + "md5sum": "53d948378b1e9a4703d25e934e460904" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "8b849bc9d4e86dd71fc5e30a1ea57124" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 33185280, + "records": [ + { + "name": "language_model.model.layers.0.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 7680 + }, + { + "name": "language_model.model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 29498880 + } + ], + "md5sum": "9ac79053e29b289f645eb5c2d931db73" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.0.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.0.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.0.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.0.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.0.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.0.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.0.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "860ce7cb8ffe27c500b5f2e3f6ba21aa" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.0.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.1.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "8eee40d625c19ea5b8845fa81d4288ff" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "38ffb781fbcbe6d4f34ca95df84a240d" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.1.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.1.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.1.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.1.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.1.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.1.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "e5a9662be4822c9853231058f721df39" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "94451d7d6b68ac88027dfadf266e7e9f" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "b1a80a976ab0df366ddb543990aa7ed4" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 28785664, + "records": [ + { + "name": "language_model.model.layers.1.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.1.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.2.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15490560 + }, + { + "name": "language_model.model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15498240 + }, + { + "name": "language_model.model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15505920 + }, + { + "name": "language_model.model.layers.2.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 15513600 + }, + { + "name": "language_model.model.layers.2.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 15514112 + }, + { + "name": "language_model.model.layers.2.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 19446272 + }, + { + "name": "language_model.model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19937792 + }, + { + "name": "language_model.model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 27802112 + }, + { + "name": "language_model.model.layers.2.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 28785152 + } + ], + "md5sum": "70b265686a575ba1fee13f4557d840a6" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "1361ff8a4d36f6ae603b82fd22f4e946" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "2ba674bef456540ed53433db674f8206" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.2.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.2.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.2.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.2.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.3.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.3.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.3.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.3.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "4aeeec1b43a32d39d837cffc47273fab" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "36a678f44f3a75eea9f4ef63c27ac419" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 33424384, + "records": [ + { + "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.3.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.3.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.3.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.3.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.3.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.4.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 29491712 + }, + { + "name": "language_model.model.layers.4.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29492224 + } + ], + "md5sum": "3b61998fd7d82350ac66729ec050ca1f" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "54d51272e3dfb0d5ee79b1b28dc4c8a4" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "99bad2b20c526457ec9b841704051f02" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 26304512, + "records": [ + { + "name": "language_model.model.layers.4.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 8355840 + }, + { + "name": "language_model.model.layers.4.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 9338880 + }, + { + "name": "language_model.model.layers.4.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 9339392 + }, + { + "name": "language_model.model.layers.4.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 17203712 + }, + { + "name": "language_model.model.layers.4.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 18186752 + }, + { + "name": "language_model.model.layers.4.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.10.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 22610432 + }, + { + "name": "language_model.model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 22618112 + } + ], + "md5sum": "cbd605aa321fdf8618921d91c0538500" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.10.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.10.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.10.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.10.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.10.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.10.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.10.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "2be5375a0eb4af0a0b79b3aaad9e8324" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.10.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.11.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "ea785075167716c2473b3663d33df1ef" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "667b6830a875ffa9a5e1ebe9eeebae1a" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.11.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.11.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.11.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.11.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.11.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.11.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "210420566ea0d449953360589a186556" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "8d5596cb4eda8d075dd37e5f9a7ef6c3" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "f55e6af76b10aa3c1594111801fa2020" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 28785664, + "records": [ + { + "name": "language_model.model.layers.11.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.11.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.12.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15490560 + }, + { + "name": "language_model.model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15498240 + }, + { + "name": "language_model.model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15505920 + }, + { + "name": "language_model.model.layers.12.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 15513600 + }, + { + "name": "language_model.model.layers.12.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 15514112 + }, + { + "name": "language_model.model.layers.12.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 19446272 + }, + { + "name": "language_model.model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19937792 + }, + { + "name": "language_model.model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 27802112 + }, + { + "name": "language_model.model.layers.12.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 28785152 + } + ], + "md5sum": "a9ac59418c0ae7d203cd6d3c0bac904d" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "e940156992eaa8665a17478dcd6c390a" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "7914d10d9a230de9605e8fffe8dc3960" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.12.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.12.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.12.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.12.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.13.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.13.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.13.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.13.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "8c2c11cd123e3a086dec63a68600c018" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "ed53c67aaa13cd244fc71bf4280978db" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "e4fc6262db8830276d43b20c4e5b0235" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.13.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.13.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.13.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.13.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.13.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.14.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.14.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "8ac5947343b58b04e22b274c98bbba06" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "606d2fd4cb0d48d9a50d573977613bb2" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 26542592, + "records": [ + { + "name": "language_model.model.layers.14.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.14.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.14.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.14.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.14.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.14.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.14.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 26051072 + } + ], + "md5sum": "a4a6e7f7dfff6b473635d874e0bede15" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 33424384, + "records": [ + { + "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.15.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.15.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7373312 + }, + { + "name": "language_model.model.layers.15.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 11305472 + }, + { + "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11796992 + }, + { + "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 19661312 + }, + { + "name": "language_model.model.layers.15.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 20644352 + }, + { + "name": "language_model.model.layers.15.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20644864 + }, + { + "name": "language_model.model.layers.15.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 28509184 + }, + { + "name": "language_model.model.layers.15.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29492224 + } + ], + "md5sum": "9b9d0ff4081bdb29175d43f02eb0a083" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.15.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.4.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "cb3f860c2e114548b27e0d010d6dc3f1" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33208320, + "records": [ + { + "name": "language_model.model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 3694080 + }, + { + "name": "language_model.model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 3701760 + }, + { + "name": "language_model.model.layers.5.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 3709440 + }, + { + "name": "language_model.model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 3717120 + } + ], + "md5sum": "ede724a7154de98e0b4904338c4989e3" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "ed724b874061249f4ae34cb2ddbd08fb" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.5.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.5.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.5.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.5.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.5.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.5.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "9d6629f94b02938d12d951b03680d186" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "559003734cf7a725fc4664a17817616a" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "3dc34ae1602f6c7e8b04dbfc6707707c" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 28785664, + "records": [ + { + "name": "language_model.model.layers.5.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.5.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.6.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15490560 + }, + { + "name": "language_model.model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15498240 + }, + { + "name": "language_model.model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15505920 + }, + { + "name": "language_model.model.layers.6.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 15513600 + }, + { + "name": "language_model.model.layers.6.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 15514112 + }, + { + "name": "language_model.model.layers.6.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 19446272 + }, + { + "name": "language_model.model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19937792 + }, + { + "name": "language_model.model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 27802112 + }, + { + "name": "language_model.model.layers.6.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 28785152 + } + ], + "md5sum": "da003975acd626173d732cb623aa760a" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "eb4c699feae3e3885a400f14b5fbade0" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "f3ab52e1c9f6d35301d386a8d07254ec" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.6.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.6.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.6.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.6.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.7.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.7.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.7.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.7.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "b2803bcb7b5922029a23d66b21927c78" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "8678938de72f94449254546b35ff016f" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "7d35d34b55a8cb63beb120f5aa03b578" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.7.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.7.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.7.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.7.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.7.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.8.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.8.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "bd96398a21ade34db86249f8e9341d5b" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "7656e4fb2084c1a7f1a3240105309833" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "3e9222df6b72185df98dd7a26d095e6e" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 30236672, + "records": [ + { + "name": "language_model.model.layers.8.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.8.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.8.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.8.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.8.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.8.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.8.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 26051072 + }, + { + "name": "language_model.model.layers.9.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 26542592 + }, + { + "name": "language_model.model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 26550272 + } + ], + "md5sum": "9b35e5d7c994d1383d3902c6f2af1087" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.9.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.9.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.9.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.9.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.9.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.9.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.9.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "3e9c401d4c998da65c2a098a907463b9" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.9.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.15.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "a07d5fd945a68c39371b3dc46f606da4" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 33208320, + "records": [ + { + "name": "language_model.model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 3694080 + }, + { + "name": "language_model.model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 3701760 + }, + { + "name": "language_model.model.layers.16.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 3709440 + }, + { + "name": "language_model.model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 3717120 + } + ], + "md5sum": "be6ab2342e57421a2f220c3c1adc4a7f" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "434fe96a2df0a9481a42f3904181b3f1" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.16.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.16.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.16.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.16.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.16.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.16.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "a85e2d8f80babf2cecda4f8cb521c81e" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "493a013e83e7db654facc19a21436ad3" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "8da1fd2c75d96960c1977ef8ad1a5958" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 28785664, + "records": [ + { + "name": "language_model.model.layers.16.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.16.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.17.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15490560 + }, + { + "name": "language_model.model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15498240 + }, + { + "name": "language_model.model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15505920 + }, + { + "name": "language_model.model.layers.17.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 15513600 + }, + { + "name": "language_model.model.layers.17.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 15514112 + }, + { + "name": "language_model.model.layers.17.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 19446272 + }, + { + "name": "language_model.model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19937792 + }, + { + "name": "language_model.model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 27802112 + }, + { + "name": "language_model.model.layers.17.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 28785152 + } + ], + "md5sum": "72749c5b6ff5b8a4d5c20b52c89cac6a" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "b44a39e3c6c1de060cce883a40e8fa8b" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "f46568f59a628cd69732934e949a578f" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.17.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.17.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.17.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.17.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.18.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.18.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.18.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.18.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "cbd1864929b2d01ebf1645095d243658" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "306898e198580361d7ac6a3680d975bd" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "ac8678ca8e5fd2cc35e54ef33d2a761d" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.18.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.18.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.18.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.18.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.18.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.19.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.19.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "bdcbbfac25bc18a9ddab603d3bd6bd3d" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "eb866b63a1678ffefa976c8678cad86c" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "8abfbf616e335066db53ce6f51d9ee30" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 30236672, + "records": [ + { + "name": "language_model.model.layers.19.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.19.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.19.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.19.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.19.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.19.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.19.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 26051072 + }, + { + "name": "language_model.model.layers.20.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 26542592 + }, + { + "name": "language_model.model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 26550272 + } + ], + "md5sum": "aeea8482c8e5a1a6980458b1d8f69740" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.20.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.20.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.20.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.20.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.20.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.20.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.20.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "d109fbd8ca68e5ce92aab466636c0d1d" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.20.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.21.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "5141f6934d5a92dcb53f75dce36cee76" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "b1e53948dca61a3c442c91ccb8f114d8" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.21.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.21.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.21.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.21.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.21.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.21.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "c7a257473d8ab6847e1387b3f03a2936" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "3a175f7caee2987ba0390b3e5984e70a" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "f0e4a14d81e0a73c2dfc6bd7e731939e" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 28785664, + "records": [ + { + "name": "language_model.model.layers.21.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.21.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.22.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15490560 + }, + { + "name": "language_model.model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15498240 + }, + { + "name": "language_model.model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15505920 + }, + { + "name": "language_model.model.layers.22.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 15513600 + }, + { + "name": "language_model.model.layers.22.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 15514112 + }, + { + "name": "language_model.model.layers.22.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 19446272 + }, + { + "name": "language_model.model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19937792 + }, + { + "name": "language_model.model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 27802112 + }, + { + "name": "language_model.model.layers.22.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 28785152 + } + ], + "md5sum": "dec6723dc71104aa5cb8e1b23df96450" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "70fbfc68560bf1c5bdf080f13e8f8012" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "46d2c272f72bdfcc3dec210c5dde5676" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.22.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.22.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.22.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.22.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.23.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.23.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.23.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.23.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "1459805a42872100b63f5c1b4172f3fa" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "523685039f7e8c5e2eb4440d71916699" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "f6216cd790a26c272db0d18ba3a34048" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.23.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.23.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.23.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.23.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.23.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.24.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.24.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "249b1941adaad9e04ed72958e0c7e5b6" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "739993fd0bf847dbe65f40b42d5f62b5" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "7fa578ac1ba26b57be34d844915ff8b1" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 30236672, + "records": [ + { + "name": "language_model.model.layers.24.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.24.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.24.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.24.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.24.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.24.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.24.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 26051072 + }, + { + "name": "language_model.model.layers.25.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 26542592 + }, + { + "name": "language_model.model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 26550272 + } + ], + "md5sum": "7336676ca7aa9d201d409bd5cae165e5" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.25.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.25.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.25.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.25.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.25.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.25.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.25.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "61d5ee69bdd310f87e76cee5ece671a2" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "cd612a55cfaaf10d2f4b53dd5a127edb" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 29983744, + "records": [ + { + "name": "language_model.model.layers.25.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.26.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.26.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7864832 + }, + { + "name": "language_model.model.layers.26.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 11796992 + }, + { + "name": "language_model.model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 12288512 + }, + { + "name": "language_model.model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 20152832 + }, + { + "name": "language_model.model.layers.26.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.26.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 21136384 + }, + { + "name": "language_model.model.layers.26.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 29000704 + } + ], + "md5sum": "4bc690313f36171766d94b0e47f763d6" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "a2746c4d2e989b5160015e7fb894f240" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "13128040602293957ecd71b4ea1434a9" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "c15766d2ebc02ef680e788d0dbadbc8c" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 32502784, + "records": [ + { + "name": "language_model.model.layers.26.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.26.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.26.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.26.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 8125440 + }, + { + "name": "language_model.model.layers.26.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 8133120 + }, + { + "name": "language_model.model.layers.27.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 8140800 + }, + { + "name": "language_model.model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 8148480 + }, + { + "name": "language_model.model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 11834880 + }, + { + "name": "language_model.model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 19207680 + }, + { + "name": "language_model.model.layers.27.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 19215360 + }, + { + "name": "language_model.model.layers.27.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 19223040 + }, + { + "name": "language_model.model.layers.27.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 19230720 + }, + { + "name": "language_model.model.layers.27.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 19231232 + }, + { + "name": "language_model.model.layers.27.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 23163392 + }, + { + "name": "language_model.model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 23654912 + }, + { + "name": "language_model.model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 31519232 + }, + { + "name": "language_model.model.layers.27.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 32502272 + } + ], + "md5sum": "2eb419b878cd04c935049e60b36fe349" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "87bfa1ffe11459584c1d433984d50bf4" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "fa01cbc4193d8237963e2c390a37b1ec" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.27.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.27.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.27.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.27.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.28.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.28.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.28.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.28.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.28.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.28.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.28.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "38b89c09fe38dd0a8aad504b57e80dbd" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "e3886178d9992a9842921cf51b4abcb1" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "b3bab7bcbea9d329e9e301edd9dbf9cb" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.28.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.28.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.28.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.28.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.28.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.29.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.29.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.29.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.29.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.29.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "96ba5bc6586f3110887569d842980f48" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "61a08a0db610e13b95877e45332821fb" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "cfe210bed8f8dabca881e0a1799177d4" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 30236672, + "records": [ + { + "name": "language_model.model.layers.29.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.29.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.29.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.29.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.29.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.29.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.29.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 26051072 + }, + { + "name": "language_model.model.layers.30.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 26542592 + }, + { + "name": "language_model.model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 26550272 + } + ], + "md5sum": "75ffd371f19e4bae177b194a2669a037" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.30.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.30.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.30.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.30.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.30.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.30.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.30.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.30.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.30.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.30.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "d95fecc743541d6838bf0175427330a2" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.30.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.31.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "b38ef500f75c26a81bb7def542c0e7ba" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "ca854788b990a57eb92926e7e2ac2b40" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.31.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.31.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.31.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.31.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.31.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.31.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.31.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.31.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.31.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "2c0aa81c7da5452fccdfe5927fcb9e58" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "7b7cbf56aa31c8b773a401a61029c8fe" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "0ed1bb2811056a6017e70bc619971892" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 28785664, + "records": [ + { + "name": "language_model.model.layers.31.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.31.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.32.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.32.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15490560 + }, + { + "name": "language_model.model.layers.32.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15498240 + }, + { + "name": "language_model.model.layers.32.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15505920 + }, + { + "name": "language_model.model.layers.32.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 15513600 + }, + { + "name": "language_model.model.layers.32.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 15514112 + }, + { + "name": "language_model.model.layers.32.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 19446272 + }, + { + "name": "language_model.model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19937792 + }, + { + "name": "language_model.model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 27802112 + }, + { + "name": "language_model.model.layers.32.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 28785152 + } + ], + "md5sum": "0c90e482a6536666ddaa620aeb967024" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "3e1b6121ba279409c9a39a818c17bdea" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "e2b77d8b4931257b53118d7b31e29178" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.32.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.32.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.32.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.32.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.33.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.33.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.33.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.33.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.33.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.33.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.33.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "c1cd09747b7b96e4f5e7f4cb81d62252" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "aa1c5829ef7aafeca614c2cec1144e08" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "62fd8c86e8afad9fefbe6f9944dde6b8" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.33.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.33.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.33.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.33.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.33.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.34.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.34.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.34.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.34.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.34.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "9cb7f2f1bc079b7ca04f690c5e78de8a" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "5e35d938968786423bab54a138e78c5c" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "14a4329438ec3de65920e765c11b470c" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 30236672, + "records": [ + { + "name": "language_model.model.layers.34.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.34.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.34.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.34.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.34.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.34.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.34.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 26051072 + }, + { + "name": "language_model.model.layers.35.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 26542592 + }, + { + "name": "language_model.model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 26550272 + } + ], + "md5sum": "e2189eb79ea79f1bf634190088c5cad2" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.35.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.35.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.35.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.35.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.35.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.35.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.35.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.35.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.35.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.35.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "bd487d987af4ebddd43ec98051048fdd" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.35.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.36.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "21feae465150c464542f396da1a08a8b" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "25b3f89e9b5055c7f9fda9ef4490820a" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.36.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.36.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.36.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.36.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.36.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.36.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.36.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.36.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.36.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "f19833c518dcff380bf82029964b824f" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "17067a4496ce776c74a3b05869f614be" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 32932864, + "records": [ + { + "name": "language_model.model.layers.36.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.36.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.37.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 11796480 + }, + { + "name": "language_model.model.layers.37.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11796992 + }, + { + "name": "language_model.model.layers.37.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 15729152 + }, + { + "name": "language_model.model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 16220672 + }, + { + "name": "language_model.model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 24084992 + }, + { + "name": "language_model.model.layers.37.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 25068032 + }, + { + "name": "language_model.model.layers.37.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 25068544 + } + ], + "md5sum": "85da918b97583116bb928bffb6b5655a" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "0f9b8a452a6a98ac4b6420078e7df8a4" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "d3fcd74507d833489ec3d150a6858b1d" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "ffcf81df7d50819154358706b551ec96" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 33485824, + "records": [ + { + "name": "language_model.model.layers.37.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.37.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 983040 + }, + { + "name": "language_model.model.layers.37.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 4915200 + }, + { + "name": "language_model.model.layers.37.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 5406720 + }, + { + "name": "language_model.model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 5414400 + }, + { + "name": "language_model.model.layers.37.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 9100800 + }, + { + "name": "language_model.model.layers.37.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 9108480 + }, + { + "name": "language_model.model.layers.37.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 9116160 + }, + { + "name": "language_model.model.layers.38.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 9123840 + }, + { + "name": "language_model.model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 9131520 + }, + { + "name": "language_model.model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 12817920 + }, + { + "name": "language_model.model.layers.38.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 20190720 + }, + { + "name": "language_model.model.layers.38.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 20198400 + }, + { + "name": "language_model.model.layers.38.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 20206080 + }, + { + "name": "language_model.model.layers.38.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 20213760 + }, + { + "name": "language_model.model.layers.38.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 20214272 + }, + { + "name": "language_model.model.layers.38.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 24146432 + }, + { + "name": "language_model.model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24637952 + }, + { + "name": "language_model.model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 32502272 + }, + { + "name": "language_model.model.layers.38.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 33485312 + } + ], + "md5sum": "e1d4d2f173fc974fbcd727d4cf10f17f" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "bfd8946ab15c8434b2b21319062c115d" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "efa9f846698fb9306b3f12b8e4ca4cdb" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.38.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.38.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.38.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.38.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.39.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.39.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.39.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.39.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.39.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.39.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.39.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "a54c8cfeb4e90276110c06d8f3d007ba" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "c793769420a7219026b4cfc9b466703f" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "ceb5b17527b69c13d9b1af53f7ae6705" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.39.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.39.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.39.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.39.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.39.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.40.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.40.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.40.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.40.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.40.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "0ca9bf508dc02ae2bffd0fab8bd0cadf" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "a7b7b1e72d1a17517f127e74c9f4500c" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "d7258a1e3a23baa32ef9c8fdc7f9026e" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 30236672, + "records": [ + { + "name": "language_model.model.layers.40.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.40.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.40.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.40.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.40.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.40.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.40.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 26051072 + }, + { + "name": "language_model.model.layers.41.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 26542592 + }, + { + "name": "language_model.model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 26550272 + } + ], + "md5sum": "65bbd67fbc5ed26a7733e1ce11578c27" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.41.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.41.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.41.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.41.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.41.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.41.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.41.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.41.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.41.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.41.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "ecd550ab18809ccad564e6113e641a0b" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.41.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.42.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "37a22d794be80191a2ddff3b9832b071" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "fa1689f506f604a23fed535c5efc9c8d" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.42.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.42.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.42.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.42.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.42.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.42.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.42.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.42.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.42.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "3fa3464c4f1c42228dcbfabb2e029e3a" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "5d57d7181a1cde3ebc1ca1bffae8e041" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "cc71cb222dd06f810ee3391b2581c582" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 28785664, + "records": [ + { + "name": "language_model.model.layers.42.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.42.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.43.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 4431360 + }, + { + "name": "language_model.model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 8117760 + }, + { + "name": "language_model.model.layers.43.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15490560 + }, + { + "name": "language_model.model.layers.43.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15498240 + }, + { + "name": "language_model.model.layers.43.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 15505920 + }, + { + "name": "language_model.model.layers.43.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 15513600 + }, + { + "name": "language_model.model.layers.43.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 15514112 + }, + { + "name": "language_model.model.layers.43.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 19446272 + }, + { + "name": "language_model.model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19937792 + }, + { + "name": "language_model.model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 27802112 + }, + { + "name": "language_model.model.layers.43.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 28785152 + } + ], + "md5sum": "806d96bd5dbd9b53bb57697b522b0ce7" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "ef695d60aad31380c9589d37d673e980" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "84ae3b74d1140d6d2e3b44983007c686" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 28785152, + "records": [ + { + "name": "language_model.model.layers.43.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.43.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.43.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.43.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 12779520 + }, + { + "name": "language_model.model.layers.44.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 13278720 + }, + { + "name": "language_model.model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 16965120 + }, + { + "name": "language_model.model.layers.44.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24337920 + }, + { + "name": "language_model.model.layers.44.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24345600 + }, + { + "name": "language_model.model.layers.44.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 24353280 + }, + { + "name": "language_model.model.layers.44.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24360960 + }, + { + "name": "language_model.model.layers.44.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 24361472 + }, + { + "name": "language_model.model.layers.44.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 28293632 + } + ], + "md5sum": "364ce215f4f4ed59c5dc7c37b581db23" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "4ea962ba2750e22ad7e2143d98205585" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "30f232f10b8df2c51b82f837c1eccef7" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 33209344, + "records": [ + { + "name": "language_model.model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 7864320 + }, + { + "name": "language_model.model.layers.44.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 8847360 + }, + { + "name": "language_model.model.layers.44.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.44.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 16712192 + }, + { + "name": "language_model.model.layers.44.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 17695232 + }, + { + "name": "language_model.model.layers.44.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 21627392 + }, + { + "name": "language_model.model.layers.45.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 22126592 + }, + { + "name": "language_model.model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 25812992 + }, + { + "name": "language_model.model.layers.45.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33185792 + }, + { + "name": "language_model.model.layers.45.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33193472 + }, + { + "name": "language_model.model.layers.45.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 33201152 + }, + { + "name": "language_model.model.layers.45.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 33208832 + } + ], + "md5sum": "568e79a9855f6529b185cac9443df8fd" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 29491200, + "records": [ + { + "name": "language_model.model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 0 + } + ], + "md5sum": "42dbefd55f8a07570b032d405cbde8f0" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "c23d88e92998881a6075944b0d81c997" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 30236672, + "records": [ + { + "name": "language_model.model.layers.45.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.45.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 12288000 + }, + { + "name": "language_model.model.layers.45.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 13271040 + }, + { + "name": "language_model.model.layers.45.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13271552 + }, + { + "name": "language_model.model.layers.45.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 21135872 + }, + { + "name": "language_model.model.layers.45.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 22118912 + }, + { + "name": "language_model.model.layers.45.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 26051072 + }, + { + "name": "language_model.model.layers.46.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 26542592 + }, + { + "name": "language_model.model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 26550272 + } + ], + "md5sum": "8f97ee54aae96f8d05850913e0619dea" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 33447424, + "records": [ + { + "name": "language_model.model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.46.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7372800 + }, + { + "name": "language_model.model.layers.46.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7380480 + }, + { + "name": "language_model.model.layers.46.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 7388160 + }, + { + "name": "language_model.model.layers.46.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 7395840 + }, + { + "name": "language_model.model.layers.46.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 7396352 + }, + { + "name": "language_model.model.layers.46.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 11328512 + }, + { + "name": "language_model.model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 11820032 + }, + { + "name": "language_model.model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 19684352 + }, + { + "name": "language_model.model.layers.46.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 20667392 + }, + { + "name": "language_model.model.layers.46.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 20667904 + }, + { + "name": "language_model.model.layers.46.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 28532224 + }, + { + "name": "language_model.model.layers.46.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 29515264 + } + ], + "md5sum": "8a955ce1b16df71f19decd3f9618220a" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 29990400, + "records": [ + { + "name": "language_model.model.layers.46.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.47.input_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 491520 + }, + { + "name": "language_model.model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 3840, + 1920 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29491200, + "byteOffset": 499200 + } + ], + "md5sum": "5a6c4c02c997c3b1a0272bf93f1fe19b" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 58982400, + "records": [ + { + "name": "language_model.model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 30720, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58982400, + "byteOffset": 0 + } + ], + "md5sum": "21c0e63d3e259641ffeecbe941cbb2d0" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 33201664, + "records": [ + { + "name": "language_model.model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 3840, + 480 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3686400, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 30720, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7372800, + "byteOffset": 3686400 + }, + { + "name": "language_model.model.layers.47.post_attention_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11059200 + }, + { + "name": "language_model.model.layers.47.post_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11066880 + }, + { + "name": "language_model.model.layers.47.pre_feedforward_layernorm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 11074560 + }, + { + "name": "language_model.model.layers.47.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 11082240 + }, + { + "name": "language_model.model.layers.47.self_attn.k_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 11082752 + }, + { + "name": "language_model.model.layers.47.self_attn.k_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 15014912 + }, + { + "name": "language_model.model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 3840, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 15506432 + }, + { + "name": "language_model.model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 3840, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 23370752 + }, + { + "name": "language_model.model.layers.47.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 512, + "byteOffset": 24353792 + }, + { + "name": "language_model.model.layers.47.self_attn.q_proj.q_weight", + "shape": [ + 4096, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 24354304 + }, + { + "name": "language_model.model.layers.47.self_attn.q_proj.q_scale", + "shape": [ + 4096, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 983040, + "byteOffset": 32218624 + } + ], + "md5sum": "530f4448c87ede1b7f5605ec31681e95" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 4431360, + "records": [ + { + "name": "language_model.model.layers.47.self_attn.v_proj.q_weight", + "shape": [ + 2048, + 480 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3932160, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.47.self_attn.v_proj.q_scale", + "shape": [ + 2048, + 120 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 491520, + "byteOffset": 3932160 + }, + { + "name": "language_model.model.norm.weight", + "shape": [ + 3840 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7680, + "byteOffset": 4423680 + } + ], + "md5sum": "9e45b6bd190ffc65608464456da6ae17" + } + ] +} \ No newline at end of file