imagick commited on
Commit
6cd87d1
·
verified ·
1 Parent(s): 0fb1cb1

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "architectures": [
3
  "KimiVLForConditionalGeneration"
4
  ],
@@ -7,9 +8,56 @@
7
  "AutoModel": "modeling_kimi_vl.KimiVLForConditionalGeneration",
8
  "AutoModelForCausalLM": "modeling_kimi_vl.KimiVLForConditionalGeneration"
9
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "ignore_index": -100,
 
 
 
 
 
 
 
 
11
  "media_placeholder_token_id": 163605,
 
12
  "model_type": "kimi_vl",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "text_config": {
14
  "vocab_size": 163840,
15
  "max_position_embeddings": 131072,
@@ -17,6 +65,7 @@
17
  "intermediate_size": 11264,
18
  "moe_intermediate_size": 1408,
19
  "num_hidden_layers": 27,
 
20
  "num_attention_heads": 16,
21
  "n_shared_experts": 2,
22
  "n_routed_experts": 64,
@@ -47,14 +96,151 @@
47
  "rope_scaling": null,
48
  "attention_bias": false,
49
  "attention_dropout": 0.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  "bos_token_id": 163584,
51
  "pad_token_id": 163839,
52
  "eos_token_id": 163585,
53
- "torch_dtype": "bfloat16",
54
- "tie_word_embeddings": false
 
 
 
 
 
55
  },
 
 
56
  "tie_word_embeddings": false,
57
- "torch_dtype": "bfloat16",
58
- "transformers_version": "4.50.3",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  "vocab_size": 163840
60
  }
 
1
  {
2
+ "add_cross_attention": false,
3
  "architectures": [
4
  "KimiVLForConditionalGeneration"
5
  ],
 
8
  "AutoModel": "modeling_kimi_vl.KimiVLForConditionalGeneration",
9
  "AutoModelForCausalLM": "modeling_kimi_vl.KimiVLForConditionalGeneration"
10
  },
11
+ "bad_words_ids": null,
12
+ "begin_suppress_tokens": null,
13
+ "bos_token_id": null,
14
+ "chunk_size_feed_forward": 0,
15
+ "cross_attention_hidden_size": null,
16
+ "decoder_start_token_id": null,
17
+ "diversity_penalty": 0.0,
18
+ "do_sample": false,
19
+ "early_stopping": false,
20
+ "encoder_no_repeat_ngram_size": 0,
21
+ "eos_token_id": null,
22
+ "exponential_decay_length_penalty": null,
23
+ "finetuning_task": null,
24
+ "forced_bos_token_id": null,
25
+ "forced_eos_token_id": null,
26
+ "id2label": {
27
+ "0": "LABEL_0",
28
+ "1": "LABEL_1"
29
+ },
30
  "ignore_index": -100,
31
+ "is_decoder": false,
32
+ "is_encoder_decoder": false,
33
+ "label2id": {
34
+ "LABEL_0": 0,
35
+ "LABEL_1": 1
36
+ },
37
+ "length_penalty": 1.0,
38
+ "max_length": 20,
39
  "media_placeholder_token_id": 163605,
40
+ "min_length": 0,
41
  "model_type": "kimi_vl",
42
+ "no_repeat_ngram_size": 0,
43
+ "num_beam_groups": 1,
44
+ "num_beams": 1,
45
+ "num_return_sequences": 1,
46
+ "output_attentions": false,
47
+ "output_hidden_states": false,
48
+ "output_scores": false,
49
+ "pad_token_id": 0,
50
+ "prefix": null,
51
+ "problem_type": null,
52
+ "pruned_heads": {},
53
+ "remove_invalid_values": false,
54
+ "repetition_penalty": 1.0,
55
+ "return_dict": true,
56
+ "return_dict_in_generate": false,
57
+ "sep_token_id": null,
58
+ "suppress_tokens": null,
59
+ "task_specific_params": null,
60
+ "temperature": 1.0,
61
  "text_config": {
62
  "vocab_size": 163840,
63
  "max_position_embeddings": 131072,
 
65
  "intermediate_size": 11264,
66
  "moe_intermediate_size": 1408,
67
  "num_hidden_layers": 27,
68
+ "num_nextn_predict_layers": 1,
69
  "num_attention_heads": 16,
70
  "n_shared_experts": 2,
71
  "n_routed_experts": 64,
 
96
  "rope_scaling": null,
97
  "attention_bias": false,
98
  "attention_dropout": 0.0,
99
+ "return_dict": true,
100
+ "output_hidden_states": false,
101
+ "torchscript": false,
102
+ "torch_dtype": "bfloat16",
103
+ "use_bfloat16": false,
104
+ "tf_legacy_loss": false,
105
+ "pruned_heads": {},
106
+ "tie_word_embeddings": false,
107
+ "chunk_size_feed_forward": 0,
108
+ "is_encoder_decoder": false,
109
+ "is_decoder": false,
110
+ "cross_attention_hidden_size": null,
111
+ "add_cross_attention": false,
112
+ "tie_encoder_decoder": false,
113
+ "max_length": 20,
114
+ "min_length": 0,
115
+ "do_sample": false,
116
+ "early_stopping": false,
117
+ "num_beams": 1,
118
+ "num_beam_groups": 1,
119
+ "diversity_penalty": 0.0,
120
+ "temperature": 1.0,
121
+ "top_k": 50,
122
+ "top_p": 1.0,
123
+ "typical_p": 1.0,
124
+ "repetition_penalty": 1.0,
125
+ "length_penalty": 1.0,
126
+ "no_repeat_ngram_size": 0,
127
+ "encoder_no_repeat_ngram_size": 0,
128
+ "bad_words_ids": null,
129
+ "num_return_sequences": 1,
130
+ "output_scores": false,
131
+ "return_dict_in_generate": false,
132
+ "forced_bos_token_id": null,
133
+ "forced_eos_token_id": null,
134
+ "remove_invalid_values": false,
135
+ "exponential_decay_length_penalty": null,
136
+ "suppress_tokens": null,
137
+ "begin_suppress_tokens": null,
138
+ "architectures": null,
139
+ "finetuning_task": null,
140
+ "id2label": {
141
+ "0": "LABEL_0",
142
+ "1": "LABEL_1"
143
+ },
144
+ "label2id": {
145
+ "LABEL_0": 0,
146
+ "LABEL_1": 1
147
+ },
148
+ "tokenizer_class": null,
149
+ "prefix": null,
150
  "bos_token_id": 163584,
151
  "pad_token_id": 163839,
152
  "eos_token_id": 163585,
153
+ "sep_token_id": null,
154
+ "decoder_start_token_id": null,
155
+ "task_specific_params": null,
156
+ "problem_type": null,
157
+ "_name_or_path": "",
158
+ "model_type": "deepseek_v3",
159
+ "output_attentions": false
160
  },
161
+ "tf_legacy_loss": false,
162
+ "tie_encoder_decoder": false,
163
  "tie_word_embeddings": false,
164
+ "tokenizer_class": null,
165
+ "top_k": 50,
166
+ "top_p": 1.0,
167
+ "torchscript": false,
168
+ "transformers_version": "4.53.3",
169
+ "typical_p": 1.0,
170
+ "use_bfloat16": false,
171
+ "vision_config": {
172
+ "return_dict": true,
173
+ "output_hidden_states": false,
174
+ "torchscript": false,
175
+ "torch_dtype": "bfloat16",
176
+ "use_bfloat16": false,
177
+ "tf_legacy_loss": false,
178
+ "pruned_heads": {},
179
+ "tie_word_embeddings": true,
180
+ "chunk_size_feed_forward": 0,
181
+ "is_encoder_decoder": false,
182
+ "is_decoder": false,
183
+ "cross_attention_hidden_size": null,
184
+ "add_cross_attention": false,
185
+ "tie_encoder_decoder": false,
186
+ "max_length": 20,
187
+ "min_length": 0,
188
+ "do_sample": false,
189
+ "early_stopping": false,
190
+ "num_beams": 1,
191
+ "num_beam_groups": 1,
192
+ "diversity_penalty": 0.0,
193
+ "temperature": 1.0,
194
+ "top_k": 50,
195
+ "top_p": 1.0,
196
+ "typical_p": 1.0,
197
+ "repetition_penalty": 1.0,
198
+ "length_penalty": 1.0,
199
+ "no_repeat_ngram_size": 0,
200
+ "encoder_no_repeat_ngram_size": 0,
201
+ "bad_words_ids": null,
202
+ "num_return_sequences": 1,
203
+ "output_scores": false,
204
+ "return_dict_in_generate": false,
205
+ "forced_bos_token_id": null,
206
+ "forced_eos_token_id": null,
207
+ "remove_invalid_values": false,
208
+ "exponential_decay_length_penalty": null,
209
+ "suppress_tokens": null,
210
+ "begin_suppress_tokens": null,
211
+ "architectures": null,
212
+ "finetuning_task": null,
213
+ "id2label": {
214
+ "0": "LABEL_0",
215
+ "1": "LABEL_1"
216
+ },
217
+ "label2id": {
218
+ "LABEL_0": 0,
219
+ "LABEL_1": 1
220
+ },
221
+ "tokenizer_class": null,
222
+ "prefix": null,
223
+ "bos_token_id": null,
224
+ "pad_token_id": null,
225
+ "eos_token_id": null,
226
+ "sep_token_id": null,
227
+ "decoder_start_token_id": null,
228
+ "task_specific_params": null,
229
+ "problem_type": null,
230
+ "_name_or_path": "",
231
+ "model_type": "moonvit",
232
+ "patch_size": 14,
233
+ "init_pos_emb_height": 64,
234
+ "init_pos_emb_width": 64,
235
+ "num_hidden_layers": 27,
236
+ "num_attention_heads": 16,
237
+ "hidden_size": 1152,
238
+ "intermediate_size": 4304,
239
+ "merge_kernel_size": [
240
+ 2,
241
+ 2
242
+ ],
243
+ "output_attentions": false
244
+ },
245
  "vocab_size": 163840
246
  }
model-00001-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c24e694b3f3d1d06801d8bb02b8a62859856558173851ba30d8abac0833686dd
3
- size 5111855204
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2c40ab7ee663d1a9d8dfdf886f58fc1ed72360d3d040134903ab4a7d4f82c7f
3
+ size 5207425600
model-00002-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3c8261e04730d6b462ac35ecad43f191e29705f2872313a0575c263258a09ba
3
- size 5110292434
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99400fb4a4db93191ee2f0eea827f5dbde491460729e221adcefef959d7934e3
3
+ size 5047890573
model-00003-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:504eb5d30887a1b5528e92bf1c998fdc41d0953961300176154846e041dd1c32
3
- size 5047891100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0b38aa086aa1bc5ec73f5a12065433f8e21a1cff8cb6586c1946d46b7b8e35f
3
+ size 5047890606
model-00004-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57f92814de160583b888e8e97870aa6b403021fa6d289d3e6dd69fa1b495b777
3
- size 5047891154
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0a8d9c435f8715067d9cf9b359a4226d5fa17e2a4c55c502d856a1cd8587511
3
+ size 5110291870
model-00005-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66e89b2bd77900920a7e5570062c99d4998dade3ffc021bc82bf44c21f32dd00
3
- size 5110292473
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eca1fc288e7134dd07f4d15e9f2c457aa1fed551e39db8b60c2bc752cd53b98a
3
+ size 5047890618
model-00006-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c35578ce0f5cd495da6c2e949814821c811f9b838e5034d9e982dd082a3ce665
3
- size 5047891130
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d1452ab13cc898a14c88ffc00a1e7226372d349707493dcffe58a7b718287de
3
+ size 5047890650
model-00007-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f6c47402cf966197708a9813da7b13dcf562909bade7928ce1a19887cff8987
3
- size 1444165317
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:625d4b768aa82c2e33050f24ea937de6def4937c1040ca60f35e0258f8de2ad9
3
+ size 2306124504
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoImageProcessor": "image_processing_kimi_vl.KimiVLImageProcessor",
4
+ "AutoProcessor": "processing_kimi_vl.KimiVLProcessor"
5
+ },
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "KimiVLImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "in_token_limit": 16384,
18
+ "merge_kernel_size": [
19
+ 2,
20
+ 2
21
+ ],
22
+ "num_pooled_tokens": 1024,
23
+ "pad_input": true,
24
+ "patch_size": 14,
25
+ "processor_class": "KimiVLProcessor"
26
+ }
processor_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoProcessor": "processing_kimi_vl.KimiVLProcessor"
4
+ },
5
+ "processor_class": "KimiVLProcessor"
6
+ }
tokenizer_config.json CHANGED
@@ -117,6 +117,7 @@
117
  "<|media_pad|>"
118
  ],
119
  "auto_map": {
 
120
  "AutoTokenizer": [
121
  "tokenization_moonshot.TikTokenTokenizer",
122
  null
@@ -128,6 +129,7 @@
128
  "extra_special_tokens": {},
129
  "model_max_length": 1048576,
130
  "pad_token": "[PAD]",
 
131
  "tokenizer_class": "TikTokenTokenizer",
132
  "unk_token": "[UNK]"
133
  }
 
117
  "<|media_pad|>"
118
  ],
119
  "auto_map": {
120
+ "AutoProcessor": "processing_kimi_vl.KimiVLProcessor",
121
  "AutoTokenizer": [
122
  "tokenization_moonshot.TikTokenTokenizer",
123
  null
 
129
  "extra_special_tokens": {},
130
  "model_max_length": 1048576,
131
  "pad_token": "[PAD]",
132
+ "processor_class": "KimiVLProcessor",
133
  "tokenizer_class": "TikTokenTokenizer",
134
  "unk_token": "[UNK]"
135
  }