| { | |
| "transformer_architecture": { | |
| "vocab_size": 128000, | |
| "vocab_file": "vocab.json", | |
| "hidden_size": 4608, | |
| "num_layers": 27, | |
| "num_attention_heads": 36, | |
| "num_local_attention_heads": 0, | |
| "local_attention_window_size": null, | |
| "rotary_embedding_base": 1000000, | |
| "rotary_percentage": 1.0, | |
| "sequence_length": 8192, | |
| "norm_type": "layernorm", | |
| "relative_position_embedding_type": "rotary_complex", | |
| "mlp_type": "default", | |
| "mlp_factor": 4.0, | |
| "attention_bias": true, | |
| "attention_qkv_in_one": false, | |
| "attention_num_kv_heads": 4, | |
| "attention_use_matmul": false, | |
| "mlp_bias": true, | |
| "key_query_norm": false, | |
| "weight_tying": false, | |
| "masked_softmax": { | |
| "kernel": "torch", | |
| "softmax_in_fp32": true, | |
| "scale": 1.0, | |
| "deterministic_flash_attn_bwd": false | |
| }, | |
| "layernorm": { | |
| "optimization_type": "torch", | |
| "layernorm_epsilon": 1e-05 | |
| }, | |
| "precision": "bfloat16", | |
| "dropout_embedding": 0.0, | |
| "dropout_attention_probs": 0.0, | |
| "dropout_after_attention": 0.0, | |
| "dropout_after_mlp": 0.0, | |
| "finetunable_token_ids": [], | |
| "image_encoder": false, | |
| "dropout_image_encoder": 0.0, | |
| "lora_config": null | |
| } | |
| } | |