Commit
·
5f25afa
1
Parent(s):
79b90c7
Upload tokenizer
Browse files- tokenizer_config.json +0 -7
tokenizer_config.json
CHANGED
|
@@ -53,11 +53,9 @@
|
|
| 53 |
"do_basic_tokenize": true,
|
| 54 |
"do_lower_case": true,
|
| 55 |
"mask_token": "[MASK]",
|
| 56 |
-
"max_length": 512,
|
| 57 |
"model_max_length": 512,
|
| 58 |
"never_split": null,
|
| 59 |
"only_label_first_subword": true,
|
| 60 |
-
"pad_to_multiple_of": null,
|
| 61 |
"pad_token": "[PAD]",
|
| 62 |
"pad_token_box": [
|
| 63 |
0,
|
|
@@ -66,8 +64,6 @@
|
|
| 66 |
0
|
| 67 |
],
|
| 68 |
"pad_token_label": -100,
|
| 69 |
-
"pad_token_type_id": 0,
|
| 70 |
-
"padding_side": "right",
|
| 71 |
"processor_class": "LayoutLMv2Processor",
|
| 72 |
"sep_token": "[SEP]",
|
| 73 |
"sep_token_box": [
|
|
@@ -76,11 +72,8 @@
|
|
| 76 |
1000,
|
| 77 |
1000
|
| 78 |
],
|
| 79 |
-
"stride": 0,
|
| 80 |
"strip_accents": null,
|
| 81 |
"tokenize_chinese_chars": true,
|
| 82 |
"tokenizer_class": "LayoutLMv2Tokenizer",
|
| 83 |
-
"truncation_side": "right",
|
| 84 |
-
"truncation_strategy": "longest_first",
|
| 85 |
"unk_token": "[UNK]"
|
| 86 |
}
|
|
|
|
| 53 |
"do_basic_tokenize": true,
|
| 54 |
"do_lower_case": true,
|
| 55 |
"mask_token": "[MASK]",
|
|
|
|
| 56 |
"model_max_length": 512,
|
| 57 |
"never_split": null,
|
| 58 |
"only_label_first_subword": true,
|
|
|
|
| 59 |
"pad_token": "[PAD]",
|
| 60 |
"pad_token_box": [
|
| 61 |
0,
|
|
|
|
| 64 |
0
|
| 65 |
],
|
| 66 |
"pad_token_label": -100,
|
|
|
|
|
|
|
| 67 |
"processor_class": "LayoutLMv2Processor",
|
| 68 |
"sep_token": "[SEP]",
|
| 69 |
"sep_token_box": [
|
|
|
|
| 72 |
1000,
|
| 73 |
1000
|
| 74 |
],
|
|
|
|
| 75 |
"strip_accents": null,
|
| 76 |
"tokenize_chinese_chars": true,
|
| 77 |
"tokenizer_class": "LayoutLMv2Tokenizer",
|
|
|
|
|
|
|
| 78 |
"unk_token": "[UNK]"
|
| 79 |
}
|