Molbap HF Staff commited on
Commit
72194f0
·
verified ·
1 Parent(s): acf25da

Upload processor

Browse files
chat_template.jinja ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- for message in messages %}
2
+ {%- if message['content'] is string %}
3
+ {{ message['content'].rstrip() }}
4
+ {%- else %}
5
+ {%- set ns = namespace(previous_was_image=False) %}
6
+ {%- for content in message['content'] %}
7
+ {%- if content['type'] == 'image' %}
8
+ <image>
9
+ {%- set ns.previous_was_image = True %}
10
+ {%- elif content['type'] == 'text' %}
11
+ {{- ('
12
+ ' if ns.previous_was_image else '') + content['text'].rstrip() }}
13
+ {%- set ns.previous_was_image = False %}
14
+ {%- endif %}
15
+ {%- endfor %}
16
+ {%- endif %}
17
+ {%- if not loop.last %}
18
+
19
+ {%- endif %}
20
+ {%- endfor %}
processor_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "base_size": {
4
+ "height": 1024,
5
+ "width": 1024
6
+ },
7
+ "crop_size": null,
8
+ "data_format": "channels_first",
9
+ "device": null,
10
+ "disable_grouping": null,
11
+ "do_center_crop": null,
12
+ "do_convert_rgb": true,
13
+ "do_normalize": true,
14
+ "do_pad": null,
15
+ "do_rescale": true,
16
+ "do_resize": true,
17
+ "dynamic_hd": 36,
18
+ "image_mean": [
19
+ 0.5,
20
+ 0.5,
21
+ 0.5
22
+ ],
23
+ "image_processor_type": "DeepseekOcrImageProcessorFast",
24
+ "image_std": [
25
+ 0.5,
26
+ 0.5,
27
+ 0.5
28
+ ],
29
+ "input_data_format": null,
30
+ "pad_size": null,
31
+ "patch_size": 16,
32
+ "processor_class": "DeepseekOcrProcessor",
33
+ "resample": 3,
34
+ "rescale_factor": 0.00392156862745098,
35
+ "return_tensors": null,
36
+ "size": {
37
+ "height": 1024,
38
+ "width": 1024
39
+ }
40
+ },
41
+ "image_token": "<image>",
42
+ "processor_class": "DeepseekOcrProcessor"
43
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|User|>",
4
+ "<|Assistant|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<|begin▁of▁sentence|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|end▁of▁sentence|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<|▁pad▁|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ }
27
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff