Upload processor
Browse files- added_tokens.json +1 -0
- chat_template.jinja +1 -0
- preprocessor_config.json +6 -0
- special_tokens_map.json +2 -14
- tokenizer.json +2 -2
- tokenizer_config.json +9 -2
added_tokens.json
CHANGED
|
@@ -8,6 +8,7 @@
|
|
| 8 |
"<|im_end|>": 151645,
|
| 9 |
"<|im_start|>": 151644,
|
| 10 |
"<|image|>": 152068,
|
|
|
|
| 11 |
"|<EXTRA_TOKENS_0>|": 151646,
|
| 12 |
"|<EXTRA_TOKENS_100>|": 151746,
|
| 13 |
"|<EXTRA_TOKENS_101>|": 151747,
|
|
|
|
| 8 |
"<|im_end|>": 151645,
|
| 9 |
"<|im_start|>": 151644,
|
| 10 |
"<|image|>": 152068,
|
| 11 |
+
"<|pad|>": 152070,
|
| 12 |
"|<EXTRA_TOKENS_0>|": 151646,
|
| 13 |
"|<EXTRA_TOKENS_100>|": 151746,
|
| 14 |
"|<EXTRA_TOKENS_101>|": 151747,
|
chat_template.jinja
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{{ bos_token or '' }}{% for message in messages %}{%- if (loop.index % 2 == 1 and message['role'] != 'user') or (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{%- endif -%}{% if message['content'] is not string %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<image> ' }}{% endif %}{% endfor %}{% endif %}{{ message['role'].capitalize() + ': ' }}{% if message['content'] is string %}{{ message['content'] + ' ' }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'text' %}{{ content['text'] + ' ' }}{% endif %}{% endfor %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}
|
preprocessor_config.json
CHANGED
|
@@ -11,6 +11,10 @@
|
|
| 11 |
"crop_size": 336,
|
| 12 |
"crop_window_patches": 16,
|
| 13 |
"crop_window_size": 224,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
"do_convert_rgb": true,
|
| 15 |
"do_normalize": true,
|
| 16 |
"do_pad": true,
|
|
@@ -36,6 +40,7 @@
|
|
| 36 |
],
|
| 37 |
"image_token_length_h": 12,
|
| 38 |
"image_token_length_w": 12,
|
|
|
|
| 39 |
"max_crops": 12,
|
| 40 |
"max_num_crops": 12,
|
| 41 |
"overlap_margins": [
|
|
@@ -49,6 +54,7 @@
|
|
| 49 |
"processor_class": "MolmoProcessor",
|
| 50 |
"resample": 2,
|
| 51 |
"rescale_factor": 0.00392156862745098,
|
|
|
|
| 52 |
"size": {
|
| 53 |
"height": 336,
|
| 54 |
"width": 336
|
|
|
|
| 11 |
"crop_size": 336,
|
| 12 |
"crop_window_patches": 16,
|
| 13 |
"crop_window_size": 224,
|
| 14 |
+
"data_format": "channels_first",
|
| 15 |
+
"default_to_square": true,
|
| 16 |
+
"device": null,
|
| 17 |
+
"do_center_crop": null,
|
| 18 |
"do_convert_rgb": true,
|
| 19 |
"do_normalize": true,
|
| 20 |
"do_pad": true,
|
|
|
|
| 40 |
],
|
| 41 |
"image_token_length_h": 12,
|
| 42 |
"image_token_length_w": 12,
|
| 43 |
+
"input_data_format": null,
|
| 44 |
"max_crops": 12,
|
| 45 |
"max_num_crops": 12,
|
| 46 |
"overlap_margins": [
|
|
|
|
| 54 |
"processor_class": "MolmoProcessor",
|
| 55 |
"resample": 2,
|
| 56 |
"rescale_factor": 0.00392156862745098,
|
| 57 |
+
"return_tensors": null,
|
| 58 |
"size": {
|
| 59 |
"height": 336,
|
| 60 |
"width": 336
|
special_tokens_map.json
CHANGED
|
@@ -425,13 +425,7 @@
|
|
| 425 |
"<|image|>"
|
| 426 |
],
|
| 427 |
"boi_token": "<im_start>",
|
| 428 |
-
"bos_token":
|
| 429 |
-
"content": "<|endoftext|>",
|
| 430 |
-
"lstrip": false,
|
| 431 |
-
"normalized": false,
|
| 432 |
-
"rstrip": false,
|
| 433 |
-
"single_word": false
|
| 434 |
-
},
|
| 435 |
"eoi_token": "<im_end>",
|
| 436 |
"eos_token": {
|
| 437 |
"content": "<|endoftext|>",
|
|
@@ -443,11 +437,5 @@
|
|
| 443 |
"im_col_token": "<im_col>",
|
| 444 |
"im_patch_token": "<im_patch>",
|
| 445 |
"image_token": "<image>",
|
| 446 |
-
"pad_token":
|
| 447 |
-
"content": "<|endoftext|>",
|
| 448 |
-
"lstrip": false,
|
| 449 |
-
"normalized": false,
|
| 450 |
-
"rstrip": false,
|
| 451 |
-
"single_word": false
|
| 452 |
-
}
|
| 453 |
}
|
|
|
|
| 425 |
"<|image|>"
|
| 426 |
],
|
| 427 |
"boi_token": "<im_start>",
|
| 428 |
+
"bos_token": "<|endoftext|>",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
"eoi_token": "<im_end>",
|
| 430 |
"eos_token": {
|
| 431 |
"content": "<|endoftext|>",
|
|
|
|
| 437 |
"im_col_token": "<im_col>",
|
| 438 |
"im_patch_token": "<im_patch>",
|
| 439 |
"image_token": "<image>",
|
| 440 |
+
"pad_token": "<|pad|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
}
|
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e12d99cec7795d0d3dd206aa62255db4c8c6a1ddf644fc2b304703b1c34a29d
|
| 3 |
+
size 11501800
|
tokenizer_config.json
CHANGED
|
@@ -3416,6 +3416,14 @@
|
|
| 3416 |
"rstrip": false,
|
| 3417 |
"single_word": false,
|
| 3418 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3419 |
}
|
| 3420 |
},
|
| 3421 |
"additional_special_tokens": [
|
|
@@ -3848,7 +3856,6 @@
|
|
| 3848 |
},
|
| 3849 |
"boi_token": "<im_start>",
|
| 3850 |
"bos_token": "<|endoftext|>",
|
| 3851 |
-
"chat_template": "{% for message in messages -%}\n {%- if (loop.index % 2 == 1 and message['role'] != 'user') or \n (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif -%}\n {{ message['role'].capitalize() + ': ' + message['content'] }}\n {%- if not loop.last -%}\n {{ ' ' }}\n {%- endif %}\n {%- endfor -%}\n {%- if add_generation_prompt -%}\n {{ ' Assistant:' }}\n {%- endif %}",
|
| 3852 |
"clean_up_tokenization_spaces": false,
|
| 3853 |
"eoi_token": "<im_end>",
|
| 3854 |
"eos_token": "<|endoftext|>",
|
|
@@ -3864,7 +3871,7 @@
|
|
| 3864 |
"im_patch_token": "<im_patch>",
|
| 3865 |
"image_token": "<image>",
|
| 3866 |
"model_max_length": 32768,
|
| 3867 |
-
"pad_token": "<|
|
| 3868 |
"processor_class": "MolmoProcessor",
|
| 3869 |
"split_special_tokens": false,
|
| 3870 |
"tokenizer_class": "Qwen2Tokenizer",
|
|
|
|
| 3416 |
"rstrip": false,
|
| 3417 |
"single_word": false,
|
| 3418 |
"special": true
|
| 3419 |
+
},
|
| 3420 |
+
"152070": {
|
| 3421 |
+
"content": "<|pad|>",
|
| 3422 |
+
"lstrip": false,
|
| 3423 |
+
"normalized": false,
|
| 3424 |
+
"rstrip": false,
|
| 3425 |
+
"single_word": false,
|
| 3426 |
+
"special": true
|
| 3427 |
}
|
| 3428 |
},
|
| 3429 |
"additional_special_tokens": [
|
|
|
|
| 3856 |
},
|
| 3857 |
"boi_token": "<im_start>",
|
| 3858 |
"bos_token": "<|endoftext|>",
|
|
|
|
| 3859 |
"clean_up_tokenization_spaces": false,
|
| 3860 |
"eoi_token": "<im_end>",
|
| 3861 |
"eos_token": "<|endoftext|>",
|
|
|
|
| 3871 |
"im_patch_token": "<im_patch>",
|
| 3872 |
"image_token": "<image>",
|
| 3873 |
"model_max_length": 32768,
|
| 3874 |
+
"pad_token": "<|pad|>",
|
| 3875 |
"processor_class": "MolmoProcessor",
|
| 3876 |
"split_special_tokens": false,
|
| 3877 |
"tokenizer_class": "Qwen2Tokenizer",
|