ERNIE-4.5-21B-A3B-Thinking / tokenizer_config.json
danielhanchen's picture
Update tokenizer_config.json
e45eec3 verified
{
"add_bos_token": true,
"add_eos_token": false,
"add_prefix_space": null,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "0",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"4": {
"content": "1",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"5": {
"content": "2",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"6": {
"content": "3",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"7": {
"content": "4",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"8": {
"content": "5",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"9": {
"content": "6",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"10": {
"content": "7",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"11": {
"content": "8",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"12": {
"content": "9",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100272": {
"content": "<|end_of_sentence|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"100273": {
"content": "<|begin_of_sentence|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"100274": {
"content": "<mask:1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"100281": {
"content": "<think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"100282": {
"content": "</think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100295": {
"content": "<tool_output>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100296": {
"content": "</tool_output>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100297": {
"content": "<tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100298": {
"content": "</tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100299": {
"content": "<response>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100300": {
"content": "</response>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100301": {
"content": "<system_setting>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100302": {
"content": "</system_setting>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100303": {
"content": "<global_setting>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100304": {
"content": "</global_setting>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100305": {
"content": "<tool_list>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100306": {
"content": "</tool_list>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100307": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"100308": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [],
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"cls_token": "<|begin_of_sentence|>",
"eos_token": "</s>",
"extra_special_tokens": {},
"header_end_token": "<mask:7>",
"header_start_token": "<mask:6>",
"legacy": true,
"mask_token": "<mask:1>",
"model_max_length": 131072,
"pad_token": "<unk>",
"padding_side": "left",
"sep_token": "<|end_of_sentence|>",
"sys_end_token": "<mask:5>",
"sys_start_token": "<mask:4>",
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false,
"chat_template": "{# Unsloth template fixes #}{{- '<|im_start|>system\n' }}{%- if messages[0].role != 'system' and not system_settings %}{{- '<global_setting>\nthink_mode=True\n</global_setting>' }}{%- else%}{{- '<system_setting>\n' }}{{- system_settings + '\n' if system_settings else '' }}{{- (messages[0].content + '\n' if messages[0].role == 'system' else '') + '</system_setting>\n\n<global_setting>\nthink_mode=True\n</global_setting>' }}{%- endif %}{%- if tools %}{{- \"\n\n<tool_list>\" }}{{- '\n' }}{{-'['}}{% for tool in tools %}{{'{\"type\": \"function\", \"function\": '}}{{-(tool.function | tojson)}}}{%-if not loop.last%},{%- endif %}{%endfor%}{{-']'}}{{- \"\n</tool_list>\" }}{%- endif %}{{-'<|im_end|>\n\n' }}{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_output>') and message.content.endswith('</tool_output>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.thoughts is string %}\n {%- set reasoning_content = message.thoughts %}\n {%- else %}\n {# Unsloth template fixes - must change to for loop since llama.cpp will error out if not #}\n {%- set parts = content.split('</think>') %}\n {%- for part in parts %}\n {%- if loop.index0 == 0 -%}\n {%- set reasoning_content = (part.split(\"<think>\")|last) %}\n {%- set reasoning_content = reasoning_content.lstrip('\\n').rstrip('\\n') -%}\n {%- else -%}\n {%- set content = part.lstrip('\\n') %}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index and (loop.last or (not loop.last and reasoning_content)) %} {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n' }} {%- else %} {{- '<|im_start|>' + message.role + '\n' }} {%- endif %} {%- if content|length > 0 %} {{- '<response>\n' + content + '\n</response>\n' }} {%- endif %} {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\n<tool_call>\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\n</tool_call>\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\n\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>tool' }}\n {%- endif %}\n {{- '\n<tool_output>' }}\n {{- message.content|tojson }}\n {{- '</tool_output>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\n\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n {%- if add_generation_prompt is defined and add_generation_prompt %}\n {{- \"<|im_start|>assistant\n<think>\n\"}}\n{%- endif %}\n{# Copyright 2025-present Unsloth. Apache 2.0 License. #}"
}