Updated tokenizer with special tokens for a chat template
Browse files- tokenizer.json +4 -4
tokenizer.json
CHANGED
|
@@ -32,7 +32,7 @@
|
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"id": 3,
|
| 35 |
-
"content": "
|
| 36 |
"single_word": false,
|
| 37 |
"lstrip": false,
|
| 38 |
"rstrip": false,
|
|
@@ -41,7 +41,7 @@
|
|
| 41 |
},
|
| 42 |
{
|
| 43 |
"id": 4,
|
| 44 |
-
"content": "
|
| 45 |
"single_word": false,
|
| 46 |
"lstrip": false,
|
| 47 |
"rstrip": false,
|
|
@@ -265,8 +265,8 @@
|
|
| 265 |
"<unk>": 0,
|
| 266 |
"<s>": 1,
|
| 267 |
"</s>": 2,
|
| 268 |
-
"
|
| 269 |
-
"
|
| 270 |
"<extra_id_2>": 5,
|
| 271 |
"<extra_id_3>": 6,
|
| 272 |
"<extra_id_4>": 7,
|
|
|
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"id": 3,
|
| 35 |
+
"content": "<|im_start|>",
|
| 36 |
"single_word": false,
|
| 37 |
"lstrip": false,
|
| 38 |
"rstrip": false,
|
|
|
|
| 41 |
},
|
| 42 |
{
|
| 43 |
"id": 4,
|
| 44 |
+
"content": "<|im_end|>",
|
| 45 |
"single_word": false,
|
| 46 |
"lstrip": false,
|
| 47 |
"rstrip": false,
|
|
|
|
| 265 |
"<unk>": 0,
|
| 266 |
"<s>": 1,
|
| 267 |
"</s>": 2,
|
| 268 |
+
"<|im_start|>": 3,
|
| 269 |
+
"<|im_end|>": 4,
|
| 270 |
"<extra_id_2>": 5,
|
| 271 |
"<extra_id_3>": 6,
|
| 272 |
"<extra_id_4>": 7,
|