michaelfeil commited on
Commit
49f9b5b
·
verified ·
1 Parent(s): aa85f24

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Llama4ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_chunk_size": 8192,
7
+ "attention_dropout": 0.0,
8
+ "attn_scale": 0.1,
9
+ "attn_temperature_tuning": 4,
10
+ "bos_token_id": 200000,
11
+ "cache_implementation": "hybrid",
12
+ "eos_token_id": [
13
+ 200001,
14
+ 200007,
15
+ 200008
16
+ ],
17
+ "floor_scale": 8192,
18
+ "for_llm_compressor": false,
19
+ "head_dim": 128,
20
+ "hidden_act": "silu",
21
+ "hidden_size": 5120,
22
+ "initializer_range": 0.02,
23
+ "interleave_moe_layer_step": 1,
24
+ "intermediate_size": 8192,
25
+ "intermediate_size_mlp": 16384,
26
+ "max_position_embeddings": 262144,
27
+ "model_type": "llama4_text",
28
+ "moe_layers": [
29
+ 0,
30
+ 1,
31
+ 2,
32
+ 3,
33
+ 4,
34
+ 5,
35
+ 6,
36
+ 7,
37
+ 8,
38
+ 9,
39
+ 10,
40
+ 11,
41
+ 12,
42
+ 13,
43
+ 14,
44
+ 15,
45
+ 16,
46
+ 17,
47
+ 18,
48
+ 19,
49
+ 20,
50
+ 21,
51
+ 22,
52
+ 23,
53
+ 24,
54
+ 25,
55
+ 26,
56
+ 27,
57
+ 28,
58
+ 29,
59
+ 30,
60
+ 31,
61
+ 32,
62
+ 33,
63
+ 34,
64
+ 35,
65
+ 36,
66
+ 37,
67
+ 38,
68
+ 39,
69
+ 40,
70
+ 41,
71
+ 42,
72
+ 43,
73
+ 44,
74
+ 45,
75
+ 46,
76
+ 47
77
+ ],
78
+ "no_rope_layers": [
79
+ 1,
80
+ 1,
81
+ 1,
82
+ 0,
83
+ 1,
84
+ 1,
85
+ 1,
86
+ 0,
87
+ 1,
88
+ 1,
89
+ 1,
90
+ 0,
91
+ 1,
92
+ 1,
93
+ 1,
94
+ 0,
95
+ 1,
96
+ 1,
97
+ 1,
98
+ 0,
99
+ 1,
100
+ 1,
101
+ 1,
102
+ 0,
103
+ 1,
104
+ 1,
105
+ 1,
106
+ 0,
107
+ 1,
108
+ 1,
109
+ 1,
110
+ 0,
111
+ 1,
112
+ 1,
113
+ 1,
114
+ 0,
115
+ 1,
116
+ 1,
117
+ 1,
118
+ 0,
119
+ 1,
120
+ 1,
121
+ 1,
122
+ 0,
123
+ 1,
124
+ 1,
125
+ 1,
126
+ 0
127
+ ],
128
+ "num_attention_heads": 40,
129
+ "num_experts_per_tok": 1,
130
+ "num_hidden_layers": 48,
131
+ "num_key_value_heads": 8,
132
+ "num_local_experts": 16,
133
+ "output_router_logits": false,
134
+ "pad_token_id": 200018,
135
+ "rms_norm_eps": 1e-05,
136
+ "rope_scaling": {
137
+ "factor": 16.0,
138
+ "high_freq_factor": 1.0,
139
+ "low_freq_factor": 1.0,
140
+ "original_max_position_embeddings": 8192,
141
+ "rope_type": "llama3"
142
+ },
143
+ "rope_theta": 500000.0,
144
+ "router_aux_loss_coef": 0.001,
145
+ "router_jitter_noise": 0.0,
146
+ "tie_word_embeddings": false,
147
+ "torch_dtype": "bfloat16",
148
+ "transformers_version": "4.51.3",
149
+ "use_cache": true,
150
+ "use_qk_norm": true,
151
+ "vocab_size": 202048
152
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 200000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 200001,
6
+ 200007,
7
+ 200008
8
+ ],
9
+ "pad_token_id": 200018,
10
+ "temperature": 0.6,
11
+ "top_p": 0.9,
12
+ "transformers_version": "4.51.3"
13
+ }
hf_quant_config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "modelopt",
4
+ "version": "0.29.0"
5
+ },
6
+ "quantization": {
7
+ "quant_algo": "FP8",
8
+ "kv_cache_quant_algo": "FP8",
9
+ "exclude_modules": [
10
+ "model.layers.0.feed_forward.router",
11
+ "model.layers.1.feed_forward.router",
12
+ "model.layers.10.feed_forward.router",
13
+ "model.layers.11.feed_forward.router",
14
+ "model.layers.12.feed_forward.router",
15
+ "model.layers.13.feed_forward.router",
16
+ "model.layers.14.feed_forward.router",
17
+ "model.layers.15.feed_forward.router",
18
+ "model.layers.16.feed_forward.router",
19
+ "model.layers.17.feed_forward.router",
20
+ "model.layers.18.feed_forward.router",
21
+ "model.layers.19.feed_forward.router",
22
+ "model.layers.2.feed_forward.router",
23
+ "model.layers.20.feed_forward.router",
24
+ "model.layers.21.feed_forward.router",
25
+ "model.layers.22.feed_forward.router",
26
+ "model.layers.23.feed_forward.router",
27
+ "model.layers.24.feed_forward.router",
28
+ "model.layers.25.feed_forward.router",
29
+ "model.layers.26.feed_forward.router",
30
+ "model.layers.27.feed_forward.router",
31
+ "model.layers.28.feed_forward.router",
32
+ "model.layers.29.feed_forward.router",
33
+ "model.layers.3.feed_forward.router",
34
+ "model.layers.30.feed_forward.router",
35
+ "model.layers.31.feed_forward.router",
36
+ "model.layers.32.feed_forward.router",
37
+ "model.layers.33.feed_forward.router",
38
+ "model.layers.34.feed_forward.router",
39
+ "model.layers.35.feed_forward.router",
40
+ "model.layers.36.feed_forward.router",
41
+ "model.layers.37.feed_forward.router",
42
+ "model.layers.38.feed_forward.router",
43
+ "model.layers.39.feed_forward.router",
44
+ "model.layers.4.feed_forward.router",
45
+ "model.layers.40.feed_forward.router",
46
+ "model.layers.41.feed_forward.router",
47
+ "model.layers.42.feed_forward.router",
48
+ "model.layers.43.feed_forward.router",
49
+ "model.layers.44.feed_forward.router",
50
+ "model.layers.45.feed_forward.router",
51
+ "model.layers.46.feed_forward.router",
52
+ "model.layers.47.feed_forward.router",
53
+ "model.layers.5.feed_forward.router",
54
+ "model.layers.6.feed_forward.router",
55
+ "model.layers.7.feed_forward.router",
56
+ "model.layers.8.feed_forward.router",
57
+ "model.layers.9.feed_forward.router",
58
+ "lm_head"
59
+ ]
60
+ }
61
+ }
model-00001-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:152b48b9ee8e4666c0ed45cb91dace72a8256b0a1ad74c6bea91ae2acc42a889
3
+ size 4334085312
model-00002-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2151f2e7845285807f5fd6943be4db5118cf2e4a767091cac5479b400f6164b
3
+ size 4404395232
model-00003-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87b0f774059e829186b2ece36ee1aa4d394766a731f9effec0b667e057ef4544
3
+ size 4404395232
model-00004-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0db195c4a36d9d50623fcd7b400fa34a15501f2ef3799c618e4ff5a1307c66db
3
+ size 4404395232
model-00005-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed6b045ab2eb420921483cba27c39c0fc3d7c729c2ba1e464e5304c78f5c5538
3
+ size 4404395232
model-00006-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bc198304d32739063c085248b5af79acdae28ef31d02dc42604a85d8267342e
3
+ size 4404395280
model-00007-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aca290b2311f55fc38fe273b87f6874a7b8e7d8785d8ce703f5b217b4df6843
3
+ size 4404395296
model-00008-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46c6acef6607b363f497414e6c07bb50cdbbbcbb3f36bac65ea2c504b549e695
3
+ size 4404395296
model-00009-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e6d402ac8058c3ba087257e93c1dec17d44376068c3cbdbab297327e4bca3a6
3
+ size 4404395296
model-00010-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a46804ae6760ad06eeab0ff2f9102c239eaf8c6d6ef7ac21e945643a49614a32
3
+ size 4404395296
model-00011-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:240cddfb441882e8dd693fafc59d7a14464b03aae0878aee7610cf8e9d877820
3
+ size 4404395296
model-00012-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c0e48554d21299c7832c2be93a991c4267b8e4608d37becbf5feaa3fee3074c
3
+ size 4404395296
model-00013-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5a3e887c9f88f50fee2b90e641257165a50314c8f1a52b3eda896ca1035afca
3
+ size 4404395296
model-00014-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3a69cfc75afa5d54ff80dfefc23284f8ad85fe8f04db560a20b6f4a82c511c8
3
+ size 4404395296
model-00015-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:617d5e5388da84cda08502533fbc78cdd4b16cc73be73da96ac00774c746484a
3
+ size 4404395296
model-00016-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a73360676e5424ffc885591d107cb8330f6a36b71b8d4e1588af443279f5e1
3
+ size 4404395296
model-00017-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8845172a32bfb8cc5b69e26325083021c98f204447e362619f5a17650155465
3
+ size 4404395296
model-00018-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bc70452ea2a2a22f562d32adf67924949294b280d5f11ba6c4c4bbef5ccd166
3
+ size 4404395296
model-00019-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802eb0e331b381042638886a7d46374da2f4f597906c535b874e57deea123836
3
+ size 4404395296
model-00020-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aacdc0192b6ba396fa800fbf92f9dcbbf6fd39378a0171dff73a4c5731f87379
3
+ size 4404395296
model-00021-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef588087936f83cc3c635d4267f3f1616e22ef201335a7ce9faaa2ae435fa942
3
+ size 4404395296
model-00022-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15a03d5ef161903806a05c3f07deb34d954f14a7afd46a8ffa5c12ff9d12c784
3
+ size 4404395296
model-00023-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2192d84c33bb4a9b45069c2d2bfdec6ae60057f01d88275502c83c95d06345b1
3
+ size 4404395296
model-00024-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ace96779dca0e8e5c0f331a672855e11297e550a96fb8a3be74df09c742df5e0
3
+ size 4404395296
model-00025-of-00025.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b51d28e1e2911dff7d737315ac8ec4b509ff7a4118093614bc2c9d94e86e6849
3
+ size 4208263568
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|begin_of_text|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|eot|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|eot|>"
17
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:172c9eb4beafc72601690da3ccfcede5c2e6806a8d5ec1fca33e22acea8023a4
3
+ size 27948578
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff