{ "architectures": [ "PS3Model" ], "model_type": "ps3", "vision_config": { "architectures": [ "PS3VisionModel" ], "model_type": "ps3_vision_model", "model_name": "vit_large_patch16_224", "hidden_size": 1024, "pool": "map", "ps3": true, "ps3_scales": [ 384, 768, 1536 ], "select_based_on_layer": [ 0, 7, 14, 23 ], "min_select_num": 1, "max_select_num": 2560, "separate_pos_emb": true, "highres_selection_feature": true, "img_size": 4096, "drop": 0.0, "class_token": false, "final_norm": false, "radio": true, "radio_adapter_mlp_version": "v2", "radio_adapter_mlp_input_dim": 1024, "radio_adapter_mlp_hidden_dim": 1520, "radio_adapter_mlp_output_dim": 1152, "radio_adapter_mlp_num_inner": 3 }, "text_config": { "context_length": 64, "vocab_size": 32000, "hf_tokenizer_name": "timm/ViT-B-16-SigLIP", "tokenizer_kwargs": { "clean": "canonicalize" }, "width": 1152, "heads": 16, "layers": 27, "mlp_ratio": 3.7362, "no_causal_mask": true, "proj_bias": true, "pool_type": "last", "norm_kwargs": { "eps": 1e-06 }, "architectures": [ "PS3TextModel" ], "model_type": "ps3_text_model", "output_dim": 1152, "prompt_proj_dim": 1024 } }