Pendrokar
/

TorchMoji

Text Classification

Model card Files Files and versions

Pendrokar commited on Feb 1, 2024

Commit

d627362

·

verified ·

1 Parent(s): ac7fa10

CLS SEP to ID 0

Files changed (1) hide show

tokenizer.json +61 -30

tokenizer.json CHANGED Viewed

@@ -24,40 +24,71 @@
 		{
 		"id":2230,	"special":false,"content":"hashtag","single_word":false,"lstrip":false,"rstrip":false,"normalized":true},
 		{
-		"id":61005,	"special":false,"content":"emoji","single_word":false,"lstrip":false,"rstrip":false,"normalized":true}],
 	"normalizer":{
-"type":"BertNormalizer",	"clean_text":true,"handle_chinese_chars":true,"strip_accents":false,"lowercase":false},
 	"pre_tokenizer":{
 		"type":"BertPreTokenizer"
 	},
-"post_processor":{
-	"type":"TemplateProcessing",	"single":[{
-	"SpecialToken":{
-		"id":"[CLS]","type_id":0}},
-		{
-	"Sequence":{
-		"id":"A","type_id":0}},
-		{
-	"SpecialToken":{
-		"id":"[SEP]","type_id":0}}],"pair":[{
-	"SpecialToken":{
-		"id":"[CLS]","type_id":0}},
-		{
-	"Sequence":{
-		"id":"A","type_id":0}},
-		{
-	"SpecialToken":{
-		"id":"[SEP]","type_id":0}},
-		{
-	"Sequence":{
-		"id":"B","type_id":1}},
-		{
-	"SpecialToken":{
-		"id":"[SEP]","type_id":1}}],"special_tokens":{
-	"[CLS]":{
-		"id":"[CLS]",	"ids":[4],"tokens":["[CLS]"]},"[SEP]":{
-		"id":"[SEP]",	"ids":[5],"tokens":["[SEP]"]}}},"decoder":{
-	"type":"WordPiece",	"prefix":"##","cleanup":true},
 	"model":{
 	"type":"WordPiece",
 	"unk_token":"CUSTOM_UNKNOWN",

 		{
 		"id":2230,	"special":false,"content":"hashtag","single_word":false,"lstrip":false,"rstrip":false,"normalized":true},
 		{
+		"id":61005,	"special":false,"content":"emoji","single_word":false,"lstrip":false,"rstrip":false,"normalized":true}
+	],
 	"normalizer":{
+		"type":"BertNormalizer",
+		"clean_text":true,
+		"handle_chinese_chars":true,
+		"strip_accents":true,
+		"lowercase":true
+	},
 	"pre_tokenizer":{
 		"type":"BertPreTokenizer"
 	},
+	"post_processor":{
+		"type":"TemplateProcessing",	"single":[{
+		"SpecialToken":{
+			"id":"[CLS]","type_id":0}
+		},
+			{
+		"Sequence":{
+			"id":"A","type_id":0}},
+			{
+		"SpecialToken":{
+			"id":"[SEP]","type_id":0}
+		}],
+		"pair":[{
+				"SpecialToken":{
+					"id":"[CLS]","type_id":0
+				}
+			},
+			{
+				"Sequence":{
+					"id":"A","type_id":0
+				}
+			},
+			{
+				"SpecialToken":{
+					"id":"[SEP]","type_id":0
+				}
+			},
+			{
+				"Sequence":{
+					"id":"B","type_id":1}
+				},
+			{
+				"SpecialToken":{
+					"id":"[SEP]","type_id":1
+				}
+			}
+		],
+		"special_tokens":{
+			"[CLS]":{
+				"id":"[CLS]",
+				"ids":[0],
+				"tokens":["[CLS]"]
+			},
+			"[SEP]":{
+				"id":"[SEP]",
+				"ids":[0],
+				"tokens":["[SEP]"]
+			}
+		}
+	},
+	"decoder":{
+		"type":"WordPiece",	"prefix":"##","cleanup":true
+	},
 	"model":{
 	"type":"WordPiece",
 	"unk_token":"CUSTOM_UNKNOWN",