| { | |
| "model": { | |
| "model_type": "HiddenStatesTokenLMHeadLogitsClassifier", | |
| "init_args": { | |
| "hidden_states_size": 2048, | |
| "logits_size": 100, | |
| "hidden_dims": [ | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024, | |
| 1024 | |
| ], | |
| "expansion_factor": 4, | |
| "dropout_rate": 0.1, | |
| "use_position_embedding": false, | |
| "freeze_lm_head": true, | |
| "normalize_input": false, | |
| "pretrained_model_name": "Qwen/Qwen3-1.7B" | |
| }, | |
| "model_specific_args": {}, | |
| "input_type": [ | |
| "hidden_states", | |
| "token", | |
| "logits" | |
| ], | |
| "output_type": "binary" | |
| }, | |
| "data": { | |
| "train": { | |
| "path": [ | |
| "local:output_qwen3_1_7b/query_dataset_train/LLM_response/SLM_prefill/LLM_continuation_verify/divergent_label_dataset" | |
| ], | |
| "type": "divergent", | |
| "input_prefix": "small_" | |
| }, | |
| "test": { | |
| "path": [ | |
| "local:output_qwen3_1_7b/query_dataset_validation/LLM_response/SLM_prefill/LLM_continuation_verify/divergent_label_dataset" | |
| ], | |
| "type": "divergent", | |
| "input_prefix": "small_" | |
| } | |
| }, | |
| "training": { | |
| "optimizer": { | |
| "lr": 5e-05, | |
| "weight_decay": 0.0005 | |
| }, | |
| "params": { | |
| "num_epochs": 50, | |
| "batch_size": 1024, | |
| "patience": 10, | |
| "device": "cuda" | |
| }, | |
| "loss": { | |
| "type": "BCEWithLogitsLoss", | |
| "recall_factor": 1.0 | |
| }, | |
| "validation": { | |
| "valid_freq": 2 | |
| }, | |
| "dtype": "float32" | |
| }, | |
| "optimizing": { | |
| "type": "threshold", | |
| "min_recall": 0.95 | |
| }, | |
| "output": { | |
| "output_dir": "resource/default_router_qwen3_1_7b.pt", | |
| "checkpoint_dir": "output/checkpoint_qwen3_1_7b", | |
| "model_name": null | |
| }, | |
| "result": { | |
| "model_path": "resource/default_router_qwen3_1_7b.pt/classifier_20250721_183847.pt", | |
| "results": { | |
| "threshold": 0.36636363636363634, | |
| "best_epoch": 7, | |
| "best_val_loss": 0.7067983349265603, | |
| "final_metrics": { | |
| "accuracy": 0.7341007184884746, | |
| "precision": 0.17696475971794748, | |
| "recall": 0.951471505652902, | |
| "f1": 0.29842523063720233, | |
| "positive_rate": 0.3195670204357768 | |
| }, | |
| "pre_opt_metrics": { | |
| "accuracy": 0.6010652523999477, | |
| "precision": 0.5730381077349015, | |
| "recall": 0.9129135702481148, | |
| "f1": 0.7041066981808973, | |
| "positive_rate": 0.8283077168204037 | |
| } | |
| } | |
| } | |
| } |