xlm-roberta-text-detector / preprocessor_config.json

Update preprocessor_config.json

3fb2361 verified 11 months ago

1.68 kB

	{
	"_name_or_path": "xlm-roberta-base",
	"architectures": [
	"XLMRobertaForSequenceClassification"
	],
	"model_type": "xlm-roberta",
	"tokenizer_class": "XLMRobertaTokenizer",
	"task_specific_params": {
	"text-classification": {
	"num_labels": 2,
	"id2label": {
	"0": "HUMAN",
	"1": "AI"
	},
	"label2id": {
	"HUMAN": 0,
	"AI": 1
	}
	}
	},
	"text_config": {
	"max_length": 512,
	"padding": "max_length",
	"truncation": true,
	"return_tensors": "pt"
	},
	"preprocessing": {
	"do_lower_case": false,
	"strip_accents": false,
	"add_special_tokens": true,
	"padding": true,
	"truncation": true,
	"max_length": 512
	},
	"special_tokens": {
	"bos_token": "<s>",
	"eos_token": "</s>",
	"unk_token": "<unk>",
	"sep_token": "</s>",
	"pad_token": "<pad>",
	"cls_token": "<s>",
	"mask_token": "<mask>"
	},
	"tokenizer_settings": {
	"clean_up_tokenization_spaces": true,
	"model_max_length": 512,
	"padding_side": "right",
	"truncation_side": "right",
	"return_attention_mask": true,
	"return_token_type_ids": false
	},
	"inference_config": {
	"return_all_scores": true,
	"output_hidden_states": false,
	"output_attentions": false,
	"return_dict": true,
	"problem_type": "single_label_classification"
	},
	"model_params": {
	"attention_probs_dropout_prob": 0.1,
	"hidden_dropout_prob": 0.1,
	"hidden_size": 768,
	"intermediate_size": 3072,
	"max_position_embeddings": 514,
	"num_attention_heads": 12,
	"num_hidden_layers": 12,
	"type_vocab_size": 1,
	"vocab_size": 250002,
	"layer_norm_eps": 1e-05
	}
	}