danielhanchen commited on
Commit
1e92f6f
·
verified ·
1 Parent(s): 794dccf

Upload folder using huggingface_hub

Browse files
special_tokens_map.json CHANGED
@@ -14,7 +14,7 @@
14
  "single_word": false
15
  },
16
  "eos_token": {
17
- "content": "</s>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
14
  "single_word": false
15
  },
16
  "eos_token": {
17
+ "content": "<|im_end|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99c25a6ebd867c54435ca2fdb8fdd3187699aea9bd85d83b6d43f50ebedb4564
3
- size 10999540
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:225303f40aa11a7240c0ac75266f1022f278469a8418f86c4fca1d1618734cf2
3
+ size 10999539
tokenizer_config.json CHANGED
@@ -257,14 +257,14 @@
257
  "normalized": false,
258
  "rstrip": false,
259
  "single_word": false,
260
- "special": false
261
  }
262
  },
263
  "additional_special_tokens": [],
264
  "bos_token": "<s>",
265
  "clean_up_tokenization_spaces": false,
266
  "cls_token": "<|begin_of_sentence|>",
267
- "eos_token": "</s>",
268
  "extra_special_tokens": {},
269
  "header_end_token": "<mask:7>",
270
  "header_start_token": "<mask:6>",
@@ -279,5 +279,5 @@
279
  "tokenizer_class": "LlamaTokenizer",
280
  "unk_token": "<unk>",
281
  "use_default_system_prompt": false,
282
- "chat_template": "{{- '<|im_start|>system\n' }}{%- if messages[0].role != 'system' and not system_settings %}{{- '<global_setting>\nthink_mode=True\n</global_setting>' }}{%- else%}{{- '<system_setting>\n' }}{{- system_settings + '\n' if system_settings else '' }}{{- (messages[0].content + '\n' if messages[0].role == 'system' else '') + '</system_setting>\n\n<global_setting>\nthink_mode=True\n</global_setting>' }}{%- endif %}{%- if tools %}{{- \"\n\n<tool_list>\" }}{{- '\n' }}{{-'['}}{% for tool in tools %}{{'{\"type\": \"function\", \"function\": '}}{{-(tool.function | tojson)}}}{%-if not loop.last%},{%- endif %}{%endfor%}{{-']'}}{{- \"\n</tool_list>\" }}{%- endif %}{{-'<|im_end|>\n\n' }}{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_output>') and message.content.endswith('</tool_output>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.thoughts is string %}\n {%- set reasoning_content = message.thoughts %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = (content.split('</think>')|first).rstrip('\n').split('<think>')[-1].lstrip('\n') %}\n {%- set content = (content.split('</think>')|last).lstrip('\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index and (loop.last or (not loop.last and reasoning_content)) %} {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n' }} {%- else %} {{- '<|im_start|>' + message.role + '\n' }} {%- endif %} {%- if content|length > 0 %} {{- '<response>\n' + content + '\n</response>\n' }} {%- endif %} {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\n<tool_call>\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\n</tool_call>\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\n\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>tool' }}\n {%- endif %}\n {{- '\n<tool_output>' }}\n {{- message.content|tojson }}\n {{- '</tool_output>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\n\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n {{- \"<|im_start|>assistant\n<think>\n\"}}"
283
  }
 
257
  "normalized": false,
258
  "rstrip": false,
259
  "single_word": false,
260
+ "special": true
261
  }
262
  },
263
  "additional_special_tokens": [],
264
  "bos_token": "<s>",
265
  "clean_up_tokenization_spaces": false,
266
  "cls_token": "<|begin_of_sentence|>",
267
+ "eos_token": "<|im_end|>",
268
  "extra_special_tokens": {},
269
  "header_end_token": "<mask:7>",
270
  "header_start_token": "<mask:6>",
 
279
  "tokenizer_class": "LlamaTokenizer",
280
  "unk_token": "<unk>",
281
  "use_default_system_prompt": false,
282
+ "chat_template": "{{- '<|im_start|>system\n' }}{%- if messages[0].role != 'system' and not system_settings %}{{- '<global_setting>\nthink_mode=True\n</global_setting>' }}{%- else%}{{- '<system_setting>\n' }}{{- system_settings + '\n' if system_settings else '' }}{{- (messages[0].content + '\n' if messages[0].role == 'system' else '') + '</system_setting>\n\n<global_setting>\nthink_mode=True\n</global_setting>' }}{%- endif %}{%- if tools %}{{- \"\n\n<tool_list>\" }}{{- '\n' }}{{-'['}}{% for tool in tools %}{{'{\"type\": \"function\", \"function\": '}}{{-(tool.function | tojson)}}}{%-if not loop.last%},{%- endif %}{%endfor%}{{-']'}}{{- \"\n</tool_list>\" }}{%- endif %}{{-'<|im_end|>\n\n' }}{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_output>') and message.content.endswith('</tool_output>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.thoughts is string %}\n {%- set reasoning_content = message.thoughts %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = (content.split('</think>')|first).rstrip('\n').split('<think>')[-1].lstrip('\n') %}\n {%- set content = (content.split('</think>')|last).lstrip('\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index and (loop.last or (not loop.last and reasoning_content)) %} {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n' }} {%- else %} {{- '<|im_start|>' + message.role + '\n' }} {%- endif %} {%- if content|length > 0 %} {{- '<response>\n' + content + '\n</response>\n' }} {%- endif %} {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\n<tool_call>\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\n</tool_call>\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\n\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>tool' }}\n {%- endif %}\n {{- '\n<tool_output>' }}\n {{- message.content|tojson }}\n {{- '</tool_output>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\n\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n {%- if add_generation_prompt is defined and add_generation_prompt %}\n {{- \"<|im_start|>assistant\n<think>\n\"}}\n{%- endif %}"
283
  }