cogent-csp-1m / tokenizer.json
pure-team's picture
Upload 5 files
a5617bb verified
raw
history blame contribute delete
390 Bytes
{
"version": "1.0",
"model": {
"type": "BPE",
"vocab": {
"<unk>": 0,
"<s>": 1,
"</s>": 2,
"<pad>": 3
},
"merges": []
},
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false
},
"post_processor": {
"type": "ByteLevel"
},
"decoder": {
"type": "ByteLevel"
},
"normalizer": {
"type": "Lowercase"
}
}