GY2233
/

R2R_router_qwen3-1.7b

Text Classification

Model card Files Files and versions

R2R_router_qwen3-1.7b / config.json

GY2233's picture

Upload R2R router config

7d5d5ba verified 4 months ago

history blame contribute delete

2.41 kB

	{
	"model": {
	"model_type": "HiddenStatesTokenLMHeadLogitsClassifier",
	"init_args": {
	"hidden_states_size": 2048,
	"logits_size": 100,
	"hidden_dims": [
	1024,
	1024,
	1024,
	1024,
	1024,
	1024
	],
	"expansion_factor": 4,
	"dropout_rate": 0.1,
	"use_position_embedding": false,
	"freeze_lm_head": true,
	"normalize_input": false,
	"pretrained_model_name": "Qwen/Qwen3-1.7B"
	},
	"model_specific_args": {},
	"input_type": [
	"hidden_states",
	"token",
	"logits"
	],
	"output_type": "binary"
	},
	"data": {
	"train": {
	"path": [
	"local:output_qwen3_1_7b/query_dataset_train/LLM_response/SLM_prefill/LLM_continuation_verify/divergent_label_dataset"
	],
	"type": "divergent",
	"input_prefix": "small_"
	},
	"test": {
	"path": [
	"local:output_qwen3_1_7b/query_dataset_validation/LLM_response/SLM_prefill/LLM_continuation_verify/divergent_label_dataset"
	],
	"type": "divergent",
	"input_prefix": "small_"
	}
	},
	"training": {
	"optimizer": {
	"lr": 5e-05,
	"weight_decay": 0.0005
	},
	"params": {
	"num_epochs": 50,
	"batch_size": 1024,
	"patience": 10,
	"device": "cuda"
	},
	"loss": {
	"type": "BCEWithLogitsLoss",
	"recall_factor": 1.0
	},
	"validation": {
	"valid_freq": 2
	},
	"dtype": "float32"
	},
	"optimizing": {
	"type": "threshold",
	"min_recall": 0.95
	},
	"output": {
	"output_dir": "resource/default_router_qwen3_1_7b.pt",
	"checkpoint_dir": "output/checkpoint_qwen3_1_7b",
	"model_name": null
	},
	"result": {
	"model_path": "resource/default_router_qwen3_1_7b.pt/classifier_20250721_183847.pt",
	"results": {
	"threshold": 0.36636363636363634,
	"best_epoch": 7,
	"best_val_loss": 0.7067983349265603,
	"final_metrics": {
	"accuracy": 0.7341007184884746,
	"precision": 0.17696475971794748,
	"recall": 0.951471505652902,
	"f1": 0.29842523063720233,
	"positive_rate": 0.3195670204357768
	},
	"pre_opt_metrics": {
	"accuracy": 0.6010652523999477,
	"precision": 0.5730381077349015,
	"recall": 0.9129135702481148,
	"f1": 0.7041066981808973,
	"positive_rate": 0.8283077168204037
	}
	}
	}
	}