{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 500 } }, { "Sequence": { "id": "A", "type_id": 500 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 500 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 500 } }, { "Sequence": { "id": "A", "type_id": 500 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 500 } }, { "Sequence": { "id": "B", "type_id": 500 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 500 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [1], "tokens": ["[CLS]"] }, "[SEP]": { "id": "[SEP]", "ids": [2], "tokens": ["[SEP]"] } } }, "decoder": null, "model": { "type": "WordLevel", "vocab": { "[PAD]": 0, "[CLS]": 1, "[SEP]": 2, "[UNK]": 3, "[MASK]": 4, "GCT": 5, "GCC": 6, "GCA": 7, "GCG": 8, "TGT": 9, "TGC": 10, "GAT": 11, "GAC": 12, "GAA": 13, "GAG": 14, "TTT": 15, "TTC": 16, "GGT": 17, "GGC": 18, "GGA": 19, "GGG": 20, "CAT": 21, "CAC": 22, "ATT": 23, "ATC": 24, "ATA": 25, "AAA": 26, "AAG": 27, "TTA": 28, "TTG": 29, "CTT": 30, "CTC": 31, "CTA": 32, "CTG": 33, "ATG": 34, "AAT": 35, "AAC": 36, "CCT": 37, "CCC": 38, "CCA": 39, "CCG": 40, "CAA": 41, "CAG": 42, "CGT": 43, "CGC": 44, "CGA": 45, "CGG": 46, "AGA": 47, "AGG": 48, "TCT": 49, "TCC": 50, "TCA": 51, "TCG": 52, "AGT": 53, "AGC": 54, "ACT": 55, "ACC": 56, "ACA": 57, "ACG": 58, "GTT": 59, "GTC": 60, "GTA": 61, "GTG": 62, "TGG": 63, "TAT": 64, "TAC": 65, "TAA": 66, "TAG": 67, "TGA": 68 }, "unk_token": "[UNK]" } }