| { | |
| "adapter_config": { | |
| "attention_dropout": 0.0, | |
| "float32_attention": true, | |
| "head_dim": 72, | |
| "hidden_act": "silu", | |
| "hidden_size": 1152, | |
| "image_feature_dropout": 0.0, | |
| "image_padding_embed": null, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 18944, | |
| "model_type": "", | |
| "num_attention_heads": 16, | |
| "num_key_value_heads": 16, | |
| "residual_dropout": 0.0, | |
| "text_hidden_size": 3584, | |
| "vit_layers": [ | |
| -3, | |
| -9 | |
| ] | |
| }, | |
| "architectures": [ | |
| "MolmoActForActionReasoning" | |
| ], | |
| "auto_map": { | |
| "AutoConfig": "configuration_molmoact.MolmoActConfig", | |
| "AutoModelForImageTextToText": "modeling_molmoact.MolmoActForActionReasoning" | |
| }, | |
| "image_patch_id": 152066, | |
| "initializer_range": 0.02, | |
| "llm_config": { | |
| "additional_vocab_size": 128, | |
| "attention_dropout": 0.0, | |
| "embedding_dropout": 0.0, | |
| "head_dim": 128, | |
| "hidden_act": "silu", | |
| "hidden_size": 3584, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 18944, | |
| "layer_norm_eps": 1e-06, | |
| "max_position_embeddings": 4096, | |
| "model_type": "molmoact_llm", | |
| "norm_after": false, | |
| "num_attention_heads": 28, | |
| "num_hidden_layers": 28, | |
| "num_key_value_heads": 4, | |
| "qk_norm_type": "olmo", | |
| "qkv_bias": true, | |
| "residual_dropout": 0.0, | |
| "rope_scaling": null, | |
| "rope_theta": 1000000.0, | |
| "use_cache": true, | |
| "use_qk_norm": false, | |
| "vocab_size": 152064 | |
| }, | |
| "model_type": "molmoact", | |
| "n_action_bins": 256, | |
| "norm_stats": { | |
| "bc_z": { | |
| "action": { | |
| "mask": [ | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| false | |
| ], | |
| "max": [ | |
| 0.2165454924106598, | |
| 0.1251407265663147, | |
| 0.10772687941789627, | |
| 0.33544227480888367, | |
| 0.28117990493774414, | |
| 0.40614867210388184, | |
| 1.0 | |
| ], | |
| "mean": [ | |
| -0.009958467446267605, | |
| 0.0008958321413956583, | |
| 0.004995597992092371, | |
| 0.00029755113064311445, | |
| -0.008735382929444313, | |
| -0.030693737789988518, | |
| 0.8344562649726868 | |
| ], | |
| "min": [ | |
| -0.1677047461271286, | |
| -0.14630407094955444, | |
| -0.10066790133714676, | |
| -0.29421567916870117, | |
| -0.32101404666900635, | |
| -0.4635624885559082, | |
| 0.0 | |
| ], | |
| "q01": [ | |
| -0.09220654994249344, | |
| -0.06456145539879798, | |
| -0.049121275544166565, | |
| -0.11594625547528267, | |
| -0.14152548640966414, | |
| -0.2251061636209488, | |
| 0.0 | |
| ], | |
| "q99": [ | |
| 0.07628866866230968, | |
| 0.058019736707210584, | |
| 0.052540797740221024, | |
| 0.11740604028105736, | |
| 0.11703975558280955, | |
| 0.16729306846857078, | |
| 1.0 | |
| ], | |
| "std": [ | |
| 0.03053455986082554, | |
| 0.0231423731893301, | |
| 0.020641816779971123, | |
| 0.04155943542718887, | |
| 0.046427831053733826, | |
| 0.0769818127155304, | |
| 0.3610210120677948 | |
| ] | |
| }, | |
| "num_trajectories": 43264, | |
| "num_transitions": 6015535, | |
| "proprio": { | |
| "max": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "mean": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "min": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "q01": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "q99": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "std": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ] | |
| } | |
| }, | |
| "bridge_orig": { | |
| "action": { | |
| "mask": [ | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| false | |
| ], | |
| "max": [ | |
| 0.41691166162490845, | |
| 0.25864794850349426, | |
| 0.21218234300613403, | |
| 3.122201919555664, | |
| 1.8618112802505493, | |
| 6.280478477478027, | |
| 1.0 | |
| ], | |
| "mean": [ | |
| 0.0002334194869035855, | |
| 0.00013004911306779832, | |
| -0.00012762474943883717, | |
| -0.0001556558854645118, | |
| -0.0004039328487124294, | |
| 0.00023557482927571982, | |
| 0.5764579176902771 | |
| ], | |
| "min": [ | |
| -0.4007510244846344, | |
| -0.13874775171279907, | |
| -0.22553899884223938, | |
| -3.2010786533355713, | |
| -1.8618112802505493, | |
| -6.279075622558594, | |
| 0.0 | |
| ], | |
| "q01": [ | |
| -0.02872725307941437, | |
| -0.04170349963009357, | |
| -0.026093858778476715, | |
| -0.08092105075716972, | |
| -0.09288699507713317, | |
| -0.20718276381492615, | |
| 0.0 | |
| ], | |
| "q99": [ | |
| 0.028309678435325586, | |
| 0.040855254605412394, | |
| 0.040161586627364146, | |
| 0.08192047759890528, | |
| 0.07792850524187081, | |
| 0.20382574498653397, | |
| 1.0 | |
| ], | |
| "std": [ | |
| 0.009765930473804474, | |
| 0.013689135201275349, | |
| 0.012667362578213215, | |
| 0.028534092009067535, | |
| 0.030637972056865692, | |
| 0.07691419124603271, | |
| 0.4973701536655426 | |
| ] | |
| }, | |
| "num_trajectories": 60064, | |
| "num_transitions": 2135463, | |
| "proprio": { | |
| "max": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "mean": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "min": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "q01": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "q99": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "std": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ] | |
| } | |
| }, | |
| "fractal20220817_data": { | |
| "action": { | |
| "mask": [ | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| false | |
| ], | |
| "max": [ | |
| 2.9984593391418457, | |
| 22.09052848815918, | |
| 2.7507524490356445, | |
| 1.570636510848999, | |
| 1.5321086645126343, | |
| 1.5691522359848022, | |
| 1.0 | |
| ], | |
| "mean": [ | |
| 0.006987582892179489, | |
| 0.006265917327255011, | |
| -0.01262515690177679, | |
| 0.04333311319351196, | |
| -0.005756212864071131, | |
| 0.0009130256366916001, | |
| 0.5354204773902893 | |
| ], | |
| "min": [ | |
| -2.0204520225524902, | |
| -5.497899532318115, | |
| -2.031663417816162, | |
| -1.569917917251587, | |
| -1.569892168045044, | |
| -1.570419430732727, | |
| 0.0 | |
| ], | |
| "q01": [ | |
| -0.22453527510166169, | |
| -0.14820013284683228, | |
| -0.231589707583189, | |
| -0.3517994859814644, | |
| -0.4193011274933815, | |
| -0.43643461108207704, | |
| 0.0 | |
| ], | |
| "q99": [ | |
| 0.17824687153100965, | |
| 0.14938379630446405, | |
| 0.21842354819178575, | |
| 0.5892666035890578, | |
| 0.35272657424211445, | |
| 0.44796681255102094, | |
| 1.0 | |
| ], | |
| "std": [ | |
| 0.0692116990685463, | |
| 0.05970962345600128, | |
| 0.07353084534406662, | |
| 0.15610496699810028, | |
| 0.13164450228214264, | |
| 0.14593800902366638, | |
| 0.497110515832901 | |
| ] | |
| }, | |
| "num_trajectories": 87212, | |
| "num_transitions": 3786400, | |
| "proprio": { | |
| "max": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "mean": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "min": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "q01": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "q99": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ], | |
| "std": [ | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0, | |
| 0.0 | |
| ] | |
| } | |
| } | |
| }, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.52.3", | |
| "use_cache": true, | |
| "vit_config": { | |
| "attention_dropout": 0.0, | |
| "float32_attention": true, | |
| "head_dim": 72, | |
| "hidden_act": "gelu_pytorch_tanh", | |
| "hidden_size": 1152, | |
| "image_default_input_size": [ | |
| 378, | |
| 378 | |
| ], | |
| "image_num_pos": 729, | |
| "image_patch_size": 14, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 4304, | |
| "layer_norm_eps": 1e-06, | |
| "model_type": "molmoact_vit", | |
| "num_attention_heads": 16, | |
| "num_hidden_layers": 27, | |
| "num_key_value_heads": 16, | |
| "patch_bias": true, | |
| "pre_layernorm": false, | |
| "residual_dropout": 0.0, | |
| "use_cls_token": false | |
| } | |
| } |