| { | |
| "architectures": [ | |
| "MiniCPMForCausalLM" | |
| ], | |
| "attention_bias": false, | |
| "attention_dropout": 0.0, | |
| "auto_map": { | |
| "AutoConfig": "configuration_minicpm.MiniCPMConfig", | |
| "AutoModel": "modeling_minicpm.MiniCPMModel", | |
| "AutoModelForCausalLM": "modeling_minicpm.MiniCPMForCausalLM", | |
| "AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPMForCausalLM", | |
| "AutoModelForSequenceClassification": "modeling_minicpm.MiniCPMForSequenceClassification" | |
| }, | |
| "bos_token_id": 1, | |
| "dim_model_base": 256, | |
| "eos_token_id": 73440, | |
| "head_dim": 128, | |
| "hidden_act": "silu", | |
| "hidden_size": 4096, | |
| "initializer_range": 0.1, | |
| "intermediate_size": 16384, | |
| "max_position_embeddings": 65536, | |
| "mlp_bias": false, | |
| "model_type": "minicpm", | |
| "num_attention_heads": 32, | |
| "num_hidden_layers": 32, | |
| "num_key_value_heads": 2, | |
| "pad_token_id": 73440, | |
| "pretraining_tp": 1, | |
| "quantization": { | |
| "group_size": 64, | |
| "bits": 4 | |
| }, | |
| "quantization_config": { | |
| "group_size": 64, | |
| "bits": 4 | |
| }, | |
| "rms_norm_eps": 1e-06, | |
| "rope_scaling": { | |
| "factor": 1.0, | |
| "long_factor": [ | |
| 0.9982316082870437, | |
| 1.033048153422584, | |
| 1.0749920956484724, | |
| 1.1255096879436193, | |
| 1.1863348602111476, | |
| 1.259543828902579, | |
| 1.3476188888731149, | |
| 1.4535223827776373, | |
| 1.5807816745852985, | |
| 1.7335856049489526, | |
| 1.9168922912975785, | |
| 2.1365471404135326, | |
| 2.3994084200118646, | |
| 2.713475511863602, | |
| 3.0880118452194134, | |
| 3.533650295140154, | |
| 4.062463396503134, | |
| 4.687974098908333, | |
| 5.425075306704039, | |
| 6.289818967956352, | |
| 7.29902962722721, | |
| 8.469695779093664, | |
| 9.81809877306655, | |
| 11.358657902065282, | |
| 13.102505860712087, | |
| 15.055862949967128, | |
| 17.218348131364184, | |
| 19.581439255386453, | |
| 22.127353314656723, | |
| 24.828633849376587, | |
| 27.6486820771775, | |
| 30.54334096108829, | |
| 33.46345345363812, | |
| 36.358112337548896, | |
| 39.17816056534983, | |
| 41.879441100069684, | |
| 44.425355159339965, | |
| 46.78844628336223, | |
| 48.95093146475928, | |
| 50.90428855401433, | |
| 52.648136512661125, | |
| 54.18869564165987, | |
| 55.537098635632745, | |
| 56.7077647874992, | |
| 57.71697544677006, | |
| 58.58171910802236, | |
| 59.31882031581807, | |
| 59.94433101822328, | |
| 60.47314411958625, | |
| 60.918782569507, | |
| 61.29331890286281, | |
| 61.60738599471455, | |
| 61.87024727431288, | |
| 62.089902123428836, | |
| 62.27320880977746, | |
| 62.42601274014111, | |
| 62.55327203194878, | |
| 62.65917552585329, | |
| 62.74725058582382, | |
| 62.82045955451526, | |
| 62.88128472678279, | |
| 62.931802319077946, | |
| 62.97374626130382, | |
| 63.008562806439365 | |
| ], | |
| "original_max_position_embeddings": 65536, | |
| "rope_type": "longrope", | |
| "short_factor": [ | |
| 0.9982316082870437, | |
| 1.033048153422584, | |
| 1.0749920956484724, | |
| 1.1255096879436193, | |
| 1.1863348602111476, | |
| 1.259543828902579, | |
| 1.3476188888731149, | |
| 1.4535223827776373, | |
| 1.5807816745852985, | |
| 1.7335856049489526, | |
| 1.9168922912975785, | |
| 2.1365471404135326, | |
| 2.3994084200118646, | |
| 2.713475511863602, | |
| 3.0880118452194134, | |
| 3.533650295140154, | |
| 4.062463396503134, | |
| 4.687974098908333, | |
| 5.425075306704039, | |
| 6.289818967956352, | |
| 7.29902962722721, | |
| 8.469695779093664, | |
| 9.81809877306655, | |
| 11.358657902065282, | |
| 13.102505860712087, | |
| 15.055862949967128, | |
| 17.218348131364184, | |
| 19.581439255386453, | |
| 22.127353314656723, | |
| 24.828633849376587, | |
| 27.6486820771775, | |
| 30.54334096108829, | |
| 33.46345345363812, | |
| 36.358112337548896, | |
| 39.17816056534983, | |
| 41.879441100069684, | |
| 44.425355159339965, | |
| 46.78844628336223, | |
| 48.95093146475928, | |
| 50.90428855401433, | |
| 52.648136512661125, | |
| 54.18869564165987, | |
| 55.537098635632745, | |
| 56.7077647874992, | |
| 57.71697544677006, | |
| 58.58171910802236, | |
| 59.31882031581807, | |
| 59.94433101822328, | |
| 60.47314411958625, | |
| 60.918782569507, | |
| 61.29331890286281, | |
| 61.60738599471455, | |
| 61.87024727431288, | |
| 62.089902123428836, | |
| 62.27320880977746, | |
| 62.42601274014111, | |
| 62.55327203194878, | |
| 62.65917552585329, | |
| 62.74725058582382, | |
| 62.82045955451526, | |
| 62.88128472678279, | |
| 62.931802319077946, | |
| 62.97374626130382, | |
| 63.008562806439365 | |
| ], | |
| "type": "longrope" | |
| }, | |
| "rope_theta": 10000.0, | |
| "scale_depth": 1.4, | |
| "scale_emb": 12, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.53.3", | |
| "use_cache": true, | |
| "vocab_size": 73448 | |
| } |