R2R_router_qwen3-1.7b / config.json
GY2233's picture
Upload R2R router config
7d5d5ba verified
{
"model": {
"model_type": "HiddenStatesTokenLMHeadLogitsClassifier",
"init_args": {
"hidden_states_size": 2048,
"logits_size": 100,
"hidden_dims": [
1024,
1024,
1024,
1024,
1024,
1024
],
"expansion_factor": 4,
"dropout_rate": 0.1,
"use_position_embedding": false,
"freeze_lm_head": true,
"normalize_input": false,
"pretrained_model_name": "Qwen/Qwen3-1.7B"
},
"model_specific_args": {},
"input_type": [
"hidden_states",
"token",
"logits"
],
"output_type": "binary"
},
"data": {
"train": {
"path": [
"local:output_qwen3_1_7b/query_dataset_train/LLM_response/SLM_prefill/LLM_continuation_verify/divergent_label_dataset"
],
"type": "divergent",
"input_prefix": "small_"
},
"test": {
"path": [
"local:output_qwen3_1_7b/query_dataset_validation/LLM_response/SLM_prefill/LLM_continuation_verify/divergent_label_dataset"
],
"type": "divergent",
"input_prefix": "small_"
}
},
"training": {
"optimizer": {
"lr": 5e-05,
"weight_decay": 0.0005
},
"params": {
"num_epochs": 50,
"batch_size": 1024,
"patience": 10,
"device": "cuda"
},
"loss": {
"type": "BCEWithLogitsLoss",
"recall_factor": 1.0
},
"validation": {
"valid_freq": 2
},
"dtype": "float32"
},
"optimizing": {
"type": "threshold",
"min_recall": 0.95
},
"output": {
"output_dir": "resource/default_router_qwen3_1_7b.pt",
"checkpoint_dir": "output/checkpoint_qwen3_1_7b",
"model_name": null
},
"result": {
"model_path": "resource/default_router_qwen3_1_7b.pt/classifier_20250721_183847.pt",
"results": {
"threshold": 0.36636363636363634,
"best_epoch": 7,
"best_val_loss": 0.7067983349265603,
"final_metrics": {
"accuracy": 0.7341007184884746,
"precision": 0.17696475971794748,
"recall": 0.951471505652902,
"f1": 0.29842523063720233,
"positive_rate": 0.3195670204357768
},
"pre_opt_metrics": {
"accuracy": 0.6010652523999477,
"precision": 0.5730381077349015,
"recall": 0.9129135702481148,
"f1": 0.7041066981808973,
"positive_rate": 0.8283077168204037
}
}
}
}