{ "metadata": { "ParamSize": 867, "ParamBytes": 7356355584.0, "BitsPerParam": 4.8288565767176745 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 503439360, "records": [ { "name": "language_model.model.embed_tokens.q_weight", "shape": [ 262208, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 503439360, "byteOffset": 0 } ], "md5sum": "6dbeddc336f544ac10a102bedbacda41" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 62929920, "records": [ { "name": "language_model.model.embed_tokens.q_scale", "shape": [ 262208, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 62929920, "byteOffset": 0 } ], "md5sum": "53d948378b1e9a4703d25e934e460904" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "8b849bc9d4e86dd71fc5e30a1ea57124" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33185280, "records": [ { "name": "language_model.model.layers.0.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 0 }, { "name": "language_model.model.layers.0.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 7680 }, { "name": "language_model.model.layers.0.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 29498880 } ], "md5sum": "9ac79053e29b289f645eb5c2d931db73" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.0.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.0.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.0.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.0.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.0.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.0.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.0.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.0.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.0.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.0.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.0.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.0.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "860ce7cb8ffe27c500b5f2e3f6ba21aa" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.0.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.1.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.1.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "8eee40d625c19ea5b8845fa81d4288ff" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "38ffb781fbcbe6d4f34ca95df84a240d" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.1.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.1.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.1.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.1.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.1.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.1.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.1.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.1.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.1.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.1.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.1.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.1.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "e5a9662be4822c9853231058f721df39" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.2.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "94451d7d6b68ac88027dfadf266e7e9f" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "b1a80a976ab0df366ddb543990aa7ed4" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 28785664, "records": [ { "name": "language_model.model.layers.1.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.1.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.2.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.2.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 8117760 }, { "name": "language_model.model.layers.2.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15490560 }, { "name": "language_model.model.layers.2.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15498240 }, { "name": "language_model.model.layers.2.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15505920 }, { "name": "language_model.model.layers.2.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 15513600 }, { "name": "language_model.model.layers.2.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 15514112 }, { "name": "language_model.model.layers.2.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 19446272 }, { "name": "language_model.model.layers.2.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19937792 }, { "name": "language_model.model.layers.2.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 27802112 }, { "name": "language_model.model.layers.2.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 28785152 } ], "md5sum": "70b265686a575ba1fee13f4557d840a6" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.3.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "1361ff8a4d36f6ae603b82fd22f4e946" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "2ba674bef456540ed53433db674f8206" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.2.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.2.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.2.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.2.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.3.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.3.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.3.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.3.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.3.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.3.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.3.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.3.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "4aeeec1b43a32d39d837cffc47273fab" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "36a678f44f3a75eea9f4ef63c27ac419" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33424384, "records": [ { "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.3.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.3.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.3.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.3.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.3.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 22118912 }, { "name": "language_model.model.layers.4.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 29491712 }, { "name": "language_model.model.layers.4.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29492224 } ], "md5sum": "3b61998fd7d82350ac66729ec050ca1f" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.10.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "54d51272e3dfb0d5ee79b1b28dc4c8a4" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "99bad2b20c526457ec9b841704051f02" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 26304512, "records": [ { "name": "language_model.model.layers.4.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.4.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 491520 }, { "name": "language_model.model.layers.4.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 8355840 }, { "name": "language_model.model.layers.4.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 9338880 }, { "name": "language_model.model.layers.4.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 9339392 }, { "name": "language_model.model.layers.4.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 17203712 }, { "name": "language_model.model.layers.4.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 18186752 }, { "name": "language_model.model.layers.4.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 22118912 }, { "name": "language_model.model.layers.10.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 22610432 }, { "name": "language_model.model.layers.10.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 22618112 } ], "md5sum": "cbd605aa321fdf8618921d91c0538500" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.10.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.10.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.10.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.10.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.10.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.10.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.10.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.10.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.10.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.10.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.10.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.10.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "2be5375a0eb4af0a0b79b3aaad9e8324" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.10.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.11.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.11.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "ea785075167716c2473b3663d33df1ef" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "667b6830a875ffa9a5e1ebe9eeebae1a" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.11.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.11.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.11.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.11.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.11.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.11.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.11.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.11.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.11.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.11.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "210420566ea0d449953360589a186556" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.12.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "8d5596cb4eda8d075dd37e5f9a7ef6c3" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "f55e6af76b10aa3c1594111801fa2020" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 28785664, "records": [ { "name": "language_model.model.layers.11.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.11.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.12.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.12.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 8117760 }, { "name": "language_model.model.layers.12.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15490560 }, { "name": "language_model.model.layers.12.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15498240 }, { "name": "language_model.model.layers.12.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15505920 }, { "name": "language_model.model.layers.12.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 15513600 }, { "name": "language_model.model.layers.12.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 15514112 }, { "name": "language_model.model.layers.12.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 19446272 }, { "name": "language_model.model.layers.12.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19937792 }, { "name": "language_model.model.layers.12.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 27802112 }, { "name": "language_model.model.layers.12.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 28785152 } ], "md5sum": "a9ac59418c0ae7d203cd6d3c0bac904d" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.13.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "e940156992eaa8665a17478dcd6c390a" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "7914d10d9a230de9605e8fffe8dc3960" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.12.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.12.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.12.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.12.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.13.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.13.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.13.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.13.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.13.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.13.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.13.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.13.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "8c2c11cd123e3a086dec63a68600c018" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.14.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "ed53c67aaa13cd244fc71bf4280978db" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "e4fc6262db8830276d43b20c4e5b0235" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.13.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.13.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.13.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.13.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.13.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.13.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.13.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.14.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.14.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.14.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.14.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.14.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.14.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "8ac5947343b58b04e22b274c98bbba06" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "606d2fd4cb0d48d9a50d573977613bb2" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 26542592, "records": [ { "name": "language_model.model.layers.14.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.14.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.14.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.14.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.14.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.14.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.14.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.14.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.14.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 26051072 } ], "md5sum": "a4a6e7f7dfff6b473635d874e0bede15" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33424384, "records": [ { "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.15.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 7372800 }, { "name": "language_model.model.layers.15.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7373312 }, { "name": "language_model.model.layers.15.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 11305472 }, { "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11796992 }, { "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 19661312 }, { "name": "language_model.model.layers.15.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 20644352 }, { "name": "language_model.model.layers.15.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20644864 }, { "name": "language_model.model.layers.15.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 28509184 }, { "name": "language_model.model.layers.15.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29492224 } ], "md5sum": "9b9d0ff4081bdb29175d43f02eb0a083" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.15.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.4.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.4.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "cb3f860c2e114548b27e0d010d6dc3f1" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33208320, "records": [ { "name": "language_model.model.layers.4.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.4.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 3686400 }, { "name": "language_model.model.layers.4.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 3694080 }, { "name": "language_model.model.layers.4.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 3701760 }, { "name": "language_model.model.layers.5.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 3709440 }, { "name": "language_model.model.layers.5.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 3717120 } ], "md5sum": "ede724a7154de98e0b4904338c4989e3" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "ed724b874061249f4ae34cb2ddbd08fb" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.5.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.5.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.5.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.5.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.5.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.5.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.5.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.5.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.5.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.5.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.5.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.5.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "9d6629f94b02938d12d951b03680d186" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.6.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "559003734cf7a725fc4664a17817616a" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "3dc34ae1602f6c7e8b04dbfc6707707c" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 28785664, "records": [ { "name": "language_model.model.layers.5.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.5.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.6.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.6.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 8117760 }, { "name": "language_model.model.layers.6.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15490560 }, { "name": "language_model.model.layers.6.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15498240 }, { "name": "language_model.model.layers.6.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15505920 }, { "name": "language_model.model.layers.6.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 15513600 }, { "name": "language_model.model.layers.6.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 15514112 }, { "name": "language_model.model.layers.6.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 19446272 }, { "name": "language_model.model.layers.6.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19937792 }, { "name": "language_model.model.layers.6.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 27802112 }, { "name": "language_model.model.layers.6.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 28785152 } ], "md5sum": "da003975acd626173d732cb623aa760a" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.7.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "eb4c699feae3e3885a400f14b5fbade0" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "f3ab52e1c9f6d35301d386a8d07254ec" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.6.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.6.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.6.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.6.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.7.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.7.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.7.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.7.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.7.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.7.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.7.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.7.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "b2803bcb7b5922029a23d66b21927c78" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.8.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "8678938de72f94449254546b35ff016f" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "7d35d34b55a8cb63beb120f5aa03b578" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.7.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.7.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.7.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.7.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.7.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.8.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.8.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.8.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.8.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.8.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.8.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "bd96398a21ade34db86249f8e9341d5b" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.9.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "7656e4fb2084c1a7f1a3240105309833" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "3e9222df6b72185df98dd7a26d095e6e" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 30236672, "records": [ { "name": "language_model.model.layers.8.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.8.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.8.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.8.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.8.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.8.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.8.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.8.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.8.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 26051072 }, { "name": "language_model.model.layers.9.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 26542592 }, { "name": "language_model.model.layers.9.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 26550272 } ], "md5sum": "9b35e5d7c994d1383d3902c6f2af1087" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.9.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.9.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.9.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.9.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.9.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.9.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.9.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.9.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.9.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.9.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.9.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.9.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "3e9c401d4c998da65c2a098a907463b9" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.9.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.15.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.15.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "a07d5fd945a68c39371b3dc46f606da4" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33208320, "records": [ { "name": "language_model.model.layers.15.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.15.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 3686400 }, { "name": "language_model.model.layers.15.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 3694080 }, { "name": "language_model.model.layers.15.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 3701760 }, { "name": "language_model.model.layers.16.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 3709440 }, { "name": "language_model.model.layers.16.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 3717120 } ], "md5sum": "be6ab2342e57421a2f220c3c1adc4a7f" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "434fe96a2df0a9481a42f3904181b3f1" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.16.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.16.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.16.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.16.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.16.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.16.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.16.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.16.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.16.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.16.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.16.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.16.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "a85e2d8f80babf2cecda4f8cb521c81e" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.17.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "493a013e83e7db654facc19a21436ad3" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "8da1fd2c75d96960c1977ef8ad1a5958" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 28785664, "records": [ { "name": "language_model.model.layers.16.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.16.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.17.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.17.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 8117760 }, { "name": "language_model.model.layers.17.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15490560 }, { "name": "language_model.model.layers.17.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15498240 }, { "name": "language_model.model.layers.17.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15505920 }, { "name": "language_model.model.layers.17.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 15513600 }, { "name": "language_model.model.layers.17.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 15514112 }, { "name": "language_model.model.layers.17.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 19446272 }, { "name": "language_model.model.layers.17.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19937792 }, { "name": "language_model.model.layers.17.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 27802112 }, { "name": "language_model.model.layers.17.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 28785152 } ], "md5sum": "72749c5b6ff5b8a4d5c20b52c89cac6a" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.18.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "b44a39e3c6c1de060cce883a40e8fa8b" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "f46568f59a628cd69732934e949a578f" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.17.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.17.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.17.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.17.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.18.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.18.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.18.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.18.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.18.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.18.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.18.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.18.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "cbd1864929b2d01ebf1645095d243658" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.19.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "306898e198580361d7ac6a3680d975bd" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "ac8678ca8e5fd2cc35e54ef33d2a761d" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.18.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.18.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.18.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.18.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.18.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.18.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.18.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.19.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.19.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.19.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.19.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.19.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.19.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "bdcbbfac25bc18a9ddab603d3bd6bd3d" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.20.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "eb866b63a1678ffefa976c8678cad86c" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "8abfbf616e335066db53ce6f51d9ee30" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 30236672, "records": [ { "name": "language_model.model.layers.19.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.19.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.19.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.19.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.19.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.19.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.19.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.19.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.19.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 26051072 }, { "name": "language_model.model.layers.20.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 26542592 }, { "name": "language_model.model.layers.20.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 26550272 } ], "md5sum": "aeea8482c8e5a1a6980458b1d8f69740" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.20.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.20.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.20.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.20.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.20.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.20.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.20.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.20.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.20.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.20.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.20.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.20.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "d109fbd8ca68e5ce92aab466636c0d1d" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.20.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.21.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.21.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "5141f6934d5a92dcb53f75dce36cee76" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "b1e53948dca61a3c442c91ccb8f114d8" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.21.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.21.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.21.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.21.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.21.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.21.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.21.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.21.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.21.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.21.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.21.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.21.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "c7a257473d8ab6847e1387b3f03a2936" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.22.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "3a175f7caee2987ba0390b3e5984e70a" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "f0e4a14d81e0a73c2dfc6bd7e731939e" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 28785664, "records": [ { "name": "language_model.model.layers.21.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.21.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.22.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.22.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 8117760 }, { "name": "language_model.model.layers.22.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15490560 }, { "name": "language_model.model.layers.22.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15498240 }, { "name": "language_model.model.layers.22.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15505920 }, { "name": "language_model.model.layers.22.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 15513600 }, { "name": "language_model.model.layers.22.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 15514112 }, { "name": "language_model.model.layers.22.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 19446272 }, { "name": "language_model.model.layers.22.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19937792 }, { "name": "language_model.model.layers.22.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 27802112 }, { "name": "language_model.model.layers.22.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 28785152 } ], "md5sum": "dec6723dc71104aa5cb8e1b23df96450" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.23.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "70fbfc68560bf1c5bdf080f13e8f8012" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "46d2c272f72bdfcc3dec210c5dde5676" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.22.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.22.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.22.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.22.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.23.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.23.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.23.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.23.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.23.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.23.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.23.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.23.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "1459805a42872100b63f5c1b4172f3fa" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.24.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "523685039f7e8c5e2eb4440d71916699" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "f6216cd790a26c272db0d18ba3a34048" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.23.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.23.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.23.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.23.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.23.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.23.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.23.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.24.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.24.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.24.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.24.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.24.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.24.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "249b1941adaad9e04ed72958e0c7e5b6" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.25.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "739993fd0bf847dbe65f40b42d5f62b5" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "7fa578ac1ba26b57be34d844915ff8b1" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 30236672, "records": [ { "name": "language_model.model.layers.24.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.24.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.24.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.24.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.24.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.24.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.24.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.24.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.24.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 26051072 }, { "name": "language_model.model.layers.25.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 26542592 }, { "name": "language_model.model.layers.25.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 26550272 } ], "md5sum": "7336676ca7aa9d201d409bd5cae165e5" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.25.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.25.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.25.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.25.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.25.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.25.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.25.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.25.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.25.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.25.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.25.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.25.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "61d5ee69bdd310f87e76cee5ece671a2" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "cd612a55cfaaf10d2f4b53dd5a127edb" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 29983744, "records": [ { "name": "language_model.model.layers.25.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 491520 }, { "name": "language_model.model.layers.26.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 7864320 }, { "name": "language_model.model.layers.26.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7864832 }, { "name": "language_model.model.layers.26.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 11796992 }, { "name": "language_model.model.layers.26.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 12288512 }, { "name": "language_model.model.layers.26.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 20152832 }, { "name": "language_model.model.layers.26.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 21135872 }, { "name": "language_model.model.layers.26.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 21136384 }, { "name": "language_model.model.layers.26.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 29000704 } ], "md5sum": "4bc690313f36171766d94b0e47f763d6" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.26.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "a2746c4d2e989b5160015e7fb894f240" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.27.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "13128040602293957ecd71b4ea1434a9" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "c15766d2ebc02ef680e788d0dbadbc8c" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 32502784, "records": [ { "name": "language_model.model.layers.26.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.26.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.26.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.26.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.26.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 8117760 }, { "name": "language_model.model.layers.26.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 8125440 }, { "name": "language_model.model.layers.26.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 8133120 }, { "name": "language_model.model.layers.27.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 8140800 }, { "name": "language_model.model.layers.27.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 8148480 }, { "name": "language_model.model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 11834880 }, { "name": "language_model.model.layers.27.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 19207680 }, { "name": "language_model.model.layers.27.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 19215360 }, { "name": "language_model.model.layers.27.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 19223040 }, { "name": "language_model.model.layers.27.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 19230720 }, { "name": "language_model.model.layers.27.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 19231232 }, { "name": "language_model.model.layers.27.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 23163392 }, { "name": "language_model.model.layers.27.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 23654912 }, { "name": "language_model.model.layers.27.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 31519232 }, { "name": "language_model.model.layers.27.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 32502272 } ], "md5sum": "2eb419b878cd04c935049e60b36fe349" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.28.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "87bfa1ffe11459584c1d433984d50bf4" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "fa01cbc4193d8237963e2c390a37b1ec" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.27.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.27.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.27.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.27.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.28.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.28.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.28.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.28.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.28.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.28.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.28.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.28.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "38b89c09fe38dd0a8aad504b57e80dbd" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.29.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "e3886178d9992a9842921cf51b4abcb1" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "b3bab7bcbea9d329e9e301edd9dbf9cb" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.28.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.28.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.28.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.28.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.28.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.28.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.28.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.29.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.29.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.29.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.29.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.29.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.29.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "96ba5bc6586f3110887569d842980f48" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.30.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "61a08a0db610e13b95877e45332821fb" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "cfe210bed8f8dabca881e0a1799177d4" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 30236672, "records": [ { "name": "language_model.model.layers.29.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.29.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.29.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.29.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.29.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.29.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.29.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.29.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.29.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 26051072 }, { "name": "language_model.model.layers.30.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 26542592 }, { "name": "language_model.model.layers.30.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 26550272 } ], "md5sum": "75ffd371f19e4bae177b194a2669a037" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.30.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.30.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.30.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.30.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.30.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.30.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.30.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.30.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.30.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.30.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.30.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.30.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "d95fecc743541d6838bf0175427330a2" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.30.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.31.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.31.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "b38ef500f75c26a81bb7def542c0e7ba" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "ca854788b990a57eb92926e7e2ac2b40" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.31.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.31.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.31.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.31.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.31.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.31.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.31.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.31.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.31.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.31.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.31.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.31.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "2c0aa81c7da5452fccdfe5927fcb9e58" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.32.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "7b7cbf56aa31c8b773a401a61029c8fe" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "0ed1bb2811056a6017e70bc619971892" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 28785664, "records": [ { "name": "language_model.model.layers.31.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.31.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.32.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.32.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 8117760 }, { "name": "language_model.model.layers.32.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15490560 }, { "name": "language_model.model.layers.32.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15498240 }, { "name": "language_model.model.layers.32.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15505920 }, { "name": "language_model.model.layers.32.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 15513600 }, { "name": "language_model.model.layers.32.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 15514112 }, { "name": "language_model.model.layers.32.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 19446272 }, { "name": "language_model.model.layers.32.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19937792 }, { "name": "language_model.model.layers.32.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 27802112 }, { "name": "language_model.model.layers.32.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 28785152 } ], "md5sum": "0c90e482a6536666ddaa620aeb967024" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.33.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "3e1b6121ba279409c9a39a818c17bdea" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "e2b77d8b4931257b53118d7b31e29178" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.32.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.32.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.32.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.32.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.33.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.33.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.33.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.33.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.33.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.33.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.33.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.33.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "c1cd09747b7b96e4f5e7f4cb81d62252" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.34.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "aa1c5829ef7aafeca614c2cec1144e08" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "62fd8c86e8afad9fefbe6f9944dde6b8" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.33.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.33.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.33.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.33.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.33.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.33.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.33.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.34.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.34.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.34.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.34.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.34.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.34.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "9cb7f2f1bc079b7ca04f690c5e78de8a" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.35.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "5e35d938968786423bab54a138e78c5c" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "14a4329438ec3de65920e765c11b470c" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 30236672, "records": [ { "name": "language_model.model.layers.34.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.34.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.34.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.34.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.34.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.34.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.34.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.34.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.34.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 26051072 }, { "name": "language_model.model.layers.35.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 26542592 }, { "name": "language_model.model.layers.35.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 26550272 } ], "md5sum": "e2189eb79ea79f1bf634190088c5cad2" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.35.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.35.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.35.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.35.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.35.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.35.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.35.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.35.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.35.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.35.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.35.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.35.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "bd487d987af4ebddd43ec98051048fdd" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.35.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.36.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.36.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "21feae465150c464542f396da1a08a8b" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "25b3f89e9b5055c7f9fda9ef4490820a" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.36.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.36.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.36.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.36.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.36.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.36.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.36.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.36.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.36.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.36.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.36.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.36.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "f19833c518dcff380bf82029964b824f" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "17067a4496ce776c74a3b05869f614be" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 32932864, "records": [ { "name": "language_model.model.layers.36.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.36.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 4423680 }, { "name": "language_model.model.layers.37.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 11796480 }, { "name": "language_model.model.layers.37.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11796992 }, { "name": "language_model.model.layers.37.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 15729152 }, { "name": "language_model.model.layers.37.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 16220672 }, { "name": "language_model.model.layers.37.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 24084992 }, { "name": "language_model.model.layers.37.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 25068032 }, { "name": "language_model.model.layers.37.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 25068544 } ], "md5sum": "85da918b97583116bb928bffb6b5655a" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.37.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "0f9b8a452a6a98ac4b6420078e7df8a4" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.38.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "d3fcd74507d833489ec3d150a6858b1d" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "ffcf81df7d50819154358706b551ec96" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 33485824, "records": [ { "name": "language_model.model.layers.37.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 0 }, { "name": "language_model.model.layers.37.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 983040 }, { "name": "language_model.model.layers.37.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 4915200 }, { "name": "language_model.model.layers.37.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 5406720 }, { "name": "language_model.model.layers.37.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 5414400 }, { "name": "language_model.model.layers.37.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 9100800 }, { "name": "language_model.model.layers.37.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 9108480 }, { "name": "language_model.model.layers.37.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 9116160 }, { "name": "language_model.model.layers.38.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 9123840 }, { "name": "language_model.model.layers.38.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 9131520 }, { "name": "language_model.model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 12817920 }, { "name": "language_model.model.layers.38.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 20190720 }, { "name": "language_model.model.layers.38.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 20198400 }, { "name": "language_model.model.layers.38.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 20206080 }, { "name": "language_model.model.layers.38.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 20213760 }, { "name": "language_model.model.layers.38.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 20214272 }, { "name": "language_model.model.layers.38.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 24146432 }, { "name": "language_model.model.layers.38.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24637952 }, { "name": "language_model.model.layers.38.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 32502272 }, { "name": "language_model.model.layers.38.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 33485312 } ], "md5sum": "e1d4d2f173fc974fbcd727d4cf10f17f" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.39.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "bfd8946ab15c8434b2b21319062c115d" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "efa9f846698fb9306b3f12b8e4ca4cdb" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.38.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.38.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.38.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.38.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.39.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.39.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.39.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.39.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.39.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.39.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.39.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.39.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "a54c8cfeb4e90276110c06d8f3d007ba" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.40.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "c793769420a7219026b4cfc9b466703f" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "ceb5b17527b69c13d9b1af53f7ae6705" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.39.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.39.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.39.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.39.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.39.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.39.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.39.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.40.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.40.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.40.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.40.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.40.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.40.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "0ca9bf508dc02ae2bffd0fab8bd0cadf" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.41.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "a7b7b1e72d1a17517f127e74c9f4500c" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "d7258a1e3a23baa32ef9c8fdc7f9026e" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 30236672, "records": [ { "name": "language_model.model.layers.40.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.40.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.40.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.40.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.40.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.40.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.40.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.40.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.40.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 26051072 }, { "name": "language_model.model.layers.41.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 26542592 }, { "name": "language_model.model.layers.41.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 26550272 } ], "md5sum": "65bbd67fbc5ed26a7733e1ce11578c27" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.41.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.41.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.41.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.41.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.41.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.41.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.41.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.41.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.41.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.41.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.41.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.41.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "ecd550ab18809ccad564e6113e641a0b" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.41.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.42.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.42.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "37a22d794be80191a2ddff3b9832b071" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "fa1689f506f604a23fed535c5efc9c8d" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.42.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.42.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.42.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.42.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.42.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.42.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.42.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.42.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.42.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.42.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.42.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.42.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "3fa3464c4f1c42228dcbfabb2e029e3a" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.43.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "5d57d7181a1cde3ebc1ca1bffae8e041" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "cc71cb222dd06f810ee3391b2581c582" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 28785664, "records": [ { "name": "language_model.model.layers.42.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.42.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.43.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 4423680 }, { "name": "language_model.model.layers.43.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 4431360 }, { "name": "language_model.model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 8117760 }, { "name": "language_model.model.layers.43.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15490560 }, { "name": "language_model.model.layers.43.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15498240 }, { "name": "language_model.model.layers.43.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 15505920 }, { "name": "language_model.model.layers.43.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 15513600 }, { "name": "language_model.model.layers.43.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 15514112 }, { "name": "language_model.model.layers.43.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 19446272 }, { "name": "language_model.model.layers.43.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19937792 }, { "name": "language_model.model.layers.43.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 27802112 }, { "name": "language_model.model.layers.43.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 28785152 } ], "md5sum": "806d96bd5dbd9b53bb57697b522b0ce7" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.44.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "ef695d60aad31380c9589d37d673e980" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "84ae3b74d1140d6d2e3b44983007c686" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 28785152, "records": [ { "name": "language_model.model.layers.43.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.43.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.43.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 8847360 }, { "name": "language_model.model.layers.43.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 12779520 }, { "name": "language_model.model.layers.44.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 13271040 }, { "name": "language_model.model.layers.44.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 13278720 }, { "name": "language_model.model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 16965120 }, { "name": "language_model.model.layers.44.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24337920 }, { "name": "language_model.model.layers.44.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24345600 }, { "name": "language_model.model.layers.44.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 24353280 }, { "name": "language_model.model.layers.44.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24360960 }, { "name": "language_model.model.layers.44.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 24361472 }, { "name": "language_model.model.layers.44.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 28293632 } ], "md5sum": "364ce215f4f4ed59c5dc7c37b581db23" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.45.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "4ea962ba2750e22ad7e2143d98205585" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "30f232f10b8df2c51b82f837c1eccef7" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 33209344, "records": [ { "name": "language_model.model.layers.44.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "language_model.model.layers.44.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 7864320 }, { "name": "language_model.model.layers.44.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 8847360 }, { "name": "language_model.model.layers.44.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 8847872 }, { "name": "language_model.model.layers.44.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 16712192 }, { "name": "language_model.model.layers.44.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 17695232 }, { "name": "language_model.model.layers.44.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 21627392 }, { "name": "language_model.model.layers.45.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 22118912 }, { "name": "language_model.model.layers.45.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 22126592 }, { "name": "language_model.model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 25812992 }, { "name": "language_model.model.layers.45.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33185792 }, { "name": "language_model.model.layers.45.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33193472 }, { "name": "language_model.model.layers.45.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 33201152 }, { "name": "language_model.model.layers.45.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 33208832 } ], "md5sum": "568e79a9855f6529b185cac9443df8fd" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "language_model.model.layers.46.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 0 } ], "md5sum": "42dbefd55f8a07570b032d405cbde8f0" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "c23d88e92998881a6075944b0d81c997" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 30236672, "records": [ { "name": "language_model.model.layers.45.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.45.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.layers.45.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 4423680 }, { "name": "language_model.model.layers.45.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 12288000 }, { "name": "language_model.model.layers.45.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 13271040 }, { "name": "language_model.model.layers.45.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13271552 }, { "name": "language_model.model.layers.45.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 21135872 }, { "name": "language_model.model.layers.45.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 22118912 }, { "name": "language_model.model.layers.45.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 26051072 }, { "name": "language_model.model.layers.46.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 26542592 }, { "name": "language_model.model.layers.46.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 26550272 } ], "md5sum": "8f97ee54aae96f8d05850913e0619dea" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 33447424, "records": [ { "name": "language_model.model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 0 }, { "name": "language_model.model.layers.46.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7372800 }, { "name": "language_model.model.layers.46.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7380480 }, { "name": "language_model.model.layers.46.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 7388160 }, { "name": "language_model.model.layers.46.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 7395840 }, { "name": "language_model.model.layers.46.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 7396352 }, { "name": "language_model.model.layers.46.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 11328512 }, { "name": "language_model.model.layers.46.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 11820032 }, { "name": "language_model.model.layers.46.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 19684352 }, { "name": "language_model.model.layers.46.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 20667392 }, { "name": "language_model.model.layers.46.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 20667904 }, { "name": "language_model.model.layers.46.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 28532224 }, { "name": "language_model.model.layers.46.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 29515264 } ], "md5sum": "8a955ce1b16df71f19decd3f9618220a" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 29990400, "records": [ { "name": "language_model.model.layers.46.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 0 }, { "name": "language_model.model.layers.47.input_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 491520 }, { "name": "language_model.model.layers.47.mlp.down_proj.q_weight", "shape": [ 3840, 1920 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29491200, "byteOffset": 499200 } ], "md5sum": "5a6c4c02c997c3b1a0272bf93f1fe19b" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 58982400, "records": [ { "name": "language_model.model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 30720, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58982400, "byteOffset": 0 } ], "md5sum": "21c0e63d3e259641ffeecbe941cbb2d0" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 33201664, "records": [ { "name": "language_model.model.layers.47.mlp.down_proj.q_scale", "shape": [ 3840, 480 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3686400, "byteOffset": 0 }, { "name": "language_model.model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 30720, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7372800, "byteOffset": 3686400 }, { "name": "language_model.model.layers.47.post_attention_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11059200 }, { "name": "language_model.model.layers.47.post_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11066880 }, { "name": "language_model.model.layers.47.pre_feedforward_layernorm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 11074560 }, { "name": "language_model.model.layers.47.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 11082240 }, { "name": "language_model.model.layers.47.self_attn.k_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 11082752 }, { "name": "language_model.model.layers.47.self_attn.k_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 15014912 }, { "name": "language_model.model.layers.47.self_attn.o_proj.q_weight", "shape": [ 3840, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 15506432 }, { "name": "language_model.model.layers.47.self_attn.o_proj.q_scale", "shape": [ 3840, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 23370752 }, { "name": "language_model.model.layers.47.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 24353792 }, { "name": "language_model.model.layers.47.self_attn.q_proj.q_weight", "shape": [ 4096, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 24354304 }, { "name": "language_model.model.layers.47.self_attn.q_proj.q_scale", "shape": [ 4096, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 983040, "byteOffset": 32218624 } ], "md5sum": "530f4448c87ede1b7f5605ec31681e95" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 4431360, "records": [ { "name": "language_model.model.layers.47.self_attn.v_proj.q_weight", "shape": [ 2048, 480 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3932160, "byteOffset": 0 }, { "name": "language_model.model.layers.47.self_attn.v_proj.q_scale", "shape": [ 2048, 120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 491520, "byteOffset": 3932160 }, { "name": "language_model.model.norm.weight", "shape": [ 3840 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7680, "byteOffset": 4423680 } ], "md5sum": "9e45b6bd190ffc65608464456da6ae17" } ] }