{ "adapter_config": { "attention_dropout": 0.0, "float32_attention": true, "head_dim": 72, "hidden_act": "silu", "hidden_size": 1152, "image_feature_dropout": 0.0, "image_padding_embed": null, "initializer_range": 0.02, "intermediate_size": 18944, "model_type": "", "num_attention_heads": 16, "num_key_value_heads": 16, "residual_dropout": 0.0, "text_hidden_size": 3584, "vit_layers": [ -3, -9 ] }, "architectures": [ "MolmoActForActionReasoning" ], "auto_map": { "AutoConfig": "configuration_molmoact.MolmoActConfig", "AutoModelForImageTextToText": "modeling_molmoact.MolmoActForActionReasoning" }, "image_patch_id": 152066, "initializer_range": 0.02, "llm_config": { "additional_vocab_size": 128, "attention_dropout": 0.0, "embedding_dropout": 0.0, "head_dim": 128, "hidden_act": "silu", "hidden_size": 3584, "initializer_range": 0.02, "intermediate_size": 18944, "layer_norm_eps": 1e-06, "max_position_embeddings": 4096, "model_type": "molmoact_llm", "norm_after": false, "num_attention_heads": 28, "num_hidden_layers": 28, "num_key_value_heads": 4, "qk_norm_type": "olmo", "qkv_bias": true, "residual_dropout": 0.0, "rope_scaling": null, "rope_theta": 1000000.0, "use_cache": true, "use_qk_norm": false, "vocab_size": 152064 }, "model_type": "molmoact", "n_action_bins": 256, "norm_stats": { "bc_z": { "action": { "mask": [ true, true, true, true, true, true, false ], "max": [ 0.2165454924106598, 0.1251407265663147, 0.10772687941789627, 0.33544227480888367, 0.28117990493774414, 0.40614867210388184, 1.0 ], "mean": [ -0.009958467446267605, 0.0008958321413956583, 0.004995597992092371, 0.00029755113064311445, -0.008735382929444313, -0.030693737789988518, 0.8344562649726868 ], "min": [ -0.1677047461271286, -0.14630407094955444, -0.10066790133714676, -0.29421567916870117, -0.32101404666900635, -0.4635624885559082, 0.0 ], "q01": [ -0.09220654994249344, -0.06456145539879798, -0.049121275544166565, -0.11594625547528267, -0.14152548640966414, -0.2251061636209488, 0.0 ], "q99": [ 0.07628866866230968, 0.058019736707210584, 0.052540797740221024, 0.11740604028105736, 0.11703975558280955, 0.16729306846857078, 1.0 ], "std": [ 0.03053455986082554, 0.0231423731893301, 0.020641816779971123, 0.04155943542718887, 0.046427831053733826, 0.0769818127155304, 0.3610210120677948 ] }, "num_trajectories": 43264, "num_transitions": 6015535, "proprio": { "max": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "mean": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "min": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "q01": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "q99": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "std": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] } }, "bridge_orig": { "action": { "mask": [ true, true, true, true, true, true, false ], "max": [ 0.41691166162490845, 0.25864794850349426, 0.21218234300613403, 3.122201919555664, 1.8618112802505493, 6.280478477478027, 1.0 ], "mean": [ 0.0002334194869035855, 0.00013004911306779832, -0.00012762474943883717, -0.0001556558854645118, -0.0004039328487124294, 0.00023557482927571982, 0.5764579176902771 ], "min": [ -0.4007510244846344, -0.13874775171279907, -0.22553899884223938, -3.2010786533355713, -1.8618112802505493, -6.279075622558594, 0.0 ], "q01": [ -0.02872725307941437, -0.04170349963009357, -0.026093858778476715, -0.08092105075716972, -0.09288699507713317, -0.20718276381492615, 0.0 ], "q99": [ 0.028309678435325586, 0.040855254605412394, 0.040161586627364146, 0.08192047759890528, 0.07792850524187081, 0.20382574498653397, 1.0 ], "std": [ 0.009765930473804474, 0.013689135201275349, 0.012667362578213215, 0.028534092009067535, 0.030637972056865692, 0.07691419124603271, 0.4973701536655426 ] }, "num_trajectories": 60064, "num_transitions": 2135463, "proprio": { "max": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "mean": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "min": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "q01": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "q99": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "std": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] } }, "fractal20220817_data": { "action": { "mask": [ true, true, true, true, true, true, false ], "max": [ 2.9984593391418457, 22.09052848815918, 2.7507524490356445, 1.570636510848999, 1.5321086645126343, 1.5691522359848022, 1.0 ], "mean": [ 0.006987582892179489, 0.006265917327255011, -0.01262515690177679, 0.04333311319351196, -0.005756212864071131, 0.0009130256366916001, 0.5354204773902893 ], "min": [ -2.0204520225524902, -5.497899532318115, -2.031663417816162, -1.569917917251587, -1.569892168045044, -1.570419430732727, 0.0 ], "q01": [ -0.22453527510166169, -0.14820013284683228, -0.231589707583189, -0.3517994859814644, -0.4193011274933815, -0.43643461108207704, 0.0 ], "q99": [ 0.17824687153100965, 0.14938379630446405, 0.21842354819178575, 0.5892666035890578, 0.35272657424211445, 0.44796681255102094, 1.0 ], "std": [ 0.0692116990685463, 0.05970962345600128, 0.07353084534406662, 0.15610496699810028, 0.13164450228214264, 0.14593800902366638, 0.497110515832901 ] }, "num_trajectories": 87212, "num_transitions": 3786400, "proprio": { "max": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "mean": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "min": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "q01": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "q99": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "std": [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ] } } }, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.52.3", "use_cache": true, "vit_config": { "attention_dropout": 0.0, "float32_attention": true, "head_dim": 72, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 1152, "image_default_input_size": [ 378, 378 ], "image_num_pos": 729, "image_patch_size": 14, "initializer_range": 0.02, "intermediate_size": 4304, "layer_norm_eps": 1e-06, "model_type": "molmoact_vit", "num_attention_heads": 16, "num_hidden_layers": 27, "num_key_value_heads": 16, "patch_bias": true, "pre_layernorm": false, "residual_dropout": 0.0, "use_cls_token": false } }