| { | |
| "add_prefix_space": false, | |
| "additional_special_tokens": [ | |
| { | |
| "__type": "AddedToken", | |
| "content": "<ENTITY_CHAIN>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| { | |
| "__type": "AddedToken", | |
| "content": " </ENTITY_CHAIN> ", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| { | |
| "__type": "AddedToken", | |
| "content": "<HTML>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false | |
| }, | |
| { | |
| "__type": "AddedToken", | |
| "content": "</HTML>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false | |
| } | |
| ], | |
| "bos_token": "<|endoftext|>", | |
| "eos_token": "<|endoftext|>", | |
| "model_max_length": 1024, | |
| "special_tokens_map_file": null, | |
| "tokenizer_class": "GPT2Tokenizer", | |
| "unk_token": "<|endoftext|>" | |
| } | |