| { | |
| "_name_or_path": "./output_llava/checkpoints/pretrain-conv-llava-jp-1.3b-stage2-1280", | |
| "activation_function": "gelu", | |
| "architectures": [ | |
| "LlavaGpt2ForCausalLM" | |
| ], | |
| "attn_pdrop": 0.1, | |
| "bos_token_id": 7, | |
| "drop_path_rates": [ | |
| 0.085, | |
| 0.088, | |
| 0.091, | |
| 0.094, | |
| 0.097, | |
| 0.1 | |
| ], | |
| "embd_pdrop": 0.1, | |
| "eos_token_id": 7, | |
| "gradient_checkpointing": false, | |
| "image_aspect_ratio": "square", | |
| "initializer_range": 0.02, | |
| "layer_norm_epsilon": 1e-05, | |
| "mm_dense_connector_type": null, | |
| "mm_hidden_size": 3072, | |
| "mm_projector_type": "mlp2x_gelu", | |
| "mm_vision_resolution": 1280, | |
| "mm_vision_select_feature": "patch", | |
| "mm_vision_select_layer": -2, | |
| "mm_vision_tower": "convnext_large", | |
| "model_type": "llava-jp", | |
| "n_ctx": 1024, | |
| "n_embd": 2048, | |
| "n_head": 16, | |
| "n_inner": 8192, | |
| "n_layer": 24, | |
| "n_positions": 2048, | |
| "reorder_and_upcast_attn": false, | |
| "resid_pdrop": 0.1, | |
| "scale_attn_by_inverse_layer_idx": false, | |
| "scale_attn_weights": true, | |
| "scales": null, | |
| "summary_activation": null, | |
| "summary_first_dropout": 0.1, | |
| "summary_proj_to_labels": true, | |
| "summary_type": "cls_index", | |
| "summary_use_proj": true, | |
| "tokenizer_model_max_length": 1532, | |
| "tokenizer_padding_side": "right", | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.41.2", | |
| "tune_mm_mlp_adapter": false, | |
| "use_cache": true, | |
| "use_mm_proj": true, | |
| "vision_add_five_stage": 6, | |
| "vision_encoder_type": "ConvNeXt", | |
| "vision_five_stage_width": 3072, | |
| "vocab_size": 50688 | |
| } | |