ShuaiYang03 commited on
Commit
524a58c
·
verified ·
1 Parent(s): 4ea7fd9

Upload folder using huggingface_hub

Browse files
Files changed (34) hide show
  1. .gitattributes +7 -0
  2. checkpoints/step-240000-epoch-05-loss=0.0910.pt +3 -0
  3. config.json +57 -0
  4. config.yaml +53 -0
  5. dataset_statistics.json +264 -0
  6. results_step-240000-epoch-05-loss=0.0910_1/log/final_result.log +114 -0
  7. results_step-240000-epoch-05-loss=0.0910_1/log/log1.log +0 -0
  8. results_step-240000-epoch-05-loss=0.0910_1/log/log2.log +0 -0
  9. results_step-240000-epoch-05-loss=0.0910_1/log/log3.log +3 -0
  10. results_step-240000-epoch-05-loss=0.0910_1/log/log4.log +0 -0
  11. results_step-240000-epoch-05-loss=0.0910_1/log/log5.log +0 -0
  12. results_step-240000-epoch-05-loss=0.0910_1/log/log6.log +0 -0
  13. results_step-240000-epoch-05-loss=0.0910_1/log/log7.log +3 -0
  14. results_step-240000-epoch-05-loss=0.0910_1/log/log8.log +0 -0
  15. results_step-240000-epoch-05-loss=0.0910_2/log/final_result.log +114 -0
  16. results_step-240000-epoch-05-loss=0.0910_2/log/log1.log +0 -0
  17. results_step-240000-epoch-05-loss=0.0910_2/log/log2.log +0 -0
  18. results_step-240000-epoch-05-loss=0.0910_2/log/log3.log +3 -0
  19. results_step-240000-epoch-05-loss=0.0910_2/log/log4.log +0 -0
  20. results_step-240000-epoch-05-loss=0.0910_2/log/log5.log +0 -0
  21. results_step-240000-epoch-05-loss=0.0910_2/log/log6.log +0 -0
  22. results_step-240000-epoch-05-loss=0.0910_2/log/log7.log +3 -0
  23. results_step-240000-epoch-05-loss=0.0910_2/log/log8.log +0 -0
  24. results_step-240000-epoch-05-loss=0.0910_3/log/final_result.log +114 -0
  25. results_step-240000-epoch-05-loss=0.0910_3/log/log1.log +0 -0
  26. results_step-240000-epoch-05-loss=0.0910_3/log/log2.log +0 -0
  27. results_step-240000-epoch-05-loss=0.0910_3/log/log3.log +3 -0
  28. results_step-240000-epoch-05-loss=0.0910_3/log/log4.log +0 -0
  29. results_step-240000-epoch-05-loss=0.0910_3/log/log5.log +0 -0
  30. results_step-240000-epoch-05-loss=0.0910_3/log/log6.log +0 -0
  31. results_step-240000-epoch-05-loss=0.0910_3/log/log7.log +3 -0
  32. results_step-240000-epoch-05-loss=0.0910_3/log/log8.log +0 -0
  33. run-metrics.jsonl +1 -0
  34. sys12_meta_query_action_only_sync_pretraining_v2_query_64_mlp_lora_reimplement_transformer_4_50_single_node_bs128_2--image_augstage1.jsonl +3 -0
.gitattributes CHANGED
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ results_step-240000-epoch-05-loss=0.0910_1/log/log3.log filter=lfs diff=lfs merge=lfs -text
37
+ results_step-240000-epoch-05-loss=0.0910_1/log/log7.log filter=lfs diff=lfs merge=lfs -text
38
+ results_step-240000-epoch-05-loss=0.0910_2/log/log3.log filter=lfs diff=lfs merge=lfs -text
39
+ results_step-240000-epoch-05-loss=0.0910_2/log/log7.log filter=lfs diff=lfs merge=lfs -text
40
+ results_step-240000-epoch-05-loss=0.0910_3/log/log3.log filter=lfs diff=lfs merge=lfs -text
41
+ results_step-240000-epoch-05-loss=0.0910_3/log/log7.log filter=lfs diff=lfs merge=lfs -text
42
+ sys12_meta_query_action_only_sync_pretraining_v2_query_64_mlp_lora_reimplement_transformer_4_50_single_node_bs128_2--image_augstage1.jsonl filter=lfs diff=lfs merge=lfs -text
checkpoints/step-240000-epoch-05-loss=0.0910.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9248ea07056100a0b3780dcd71b5c99d9efdf3c63635a7295a1ecad2a1d5d25e
3
+ size 11398385050
config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 7,
3
+ "data_root_dir": "/mnt/petrelfs/yangshuai1/rep/InstructVLA_official/cache",
4
+ "debug": false,
5
+ "disable_instruction": false,
6
+ "fix_system1": false,
7
+ "future_action_window_size": 15,
8
+ "hf_token": ".hf_token",
9
+ "image_aug": true,
10
+ "is_resume": true,
11
+ "load_all_data_for_training": true,
12
+ "num_of_meta_query": 64,
13
+ "past_action_window_size": 0,
14
+ "pretrained_checkpoint": "/mnt/petrelfs/yangshuai1/rep/InstructVLA_official/outputs/code_reimp/sys12_meta_query_action_only_sync_pretraining_v2_query_64_mlp_lora_reimplement_transformer_4_50_single_node_bs128_2--image_augstage1/checkpoints/step-180000-epoch-03-loss=0.1214.pt",
15
+ "repeated_diffusion_steps": 4,
16
+ "resume_epoch": 3,
17
+ "resume_step": 180000,
18
+ "run_id": "sys12_meta_query_action_only_sync_pretraining_v2_query_64_mlp_lora_reimplement_transformer_4_50_single_node_bs128_2--image_augstage1",
19
+ "run_id_note": null,
20
+ "run_root_dir": "outputs/code_reimp",
21
+ "save_interval": 20000,
22
+ "seed": 42,
23
+ "stage": "stage1",
24
+ "trackers": [
25
+ "jsonl",
26
+ "wandb"
27
+ ],
28
+ "use_mm": false,
29
+ "vla": {
30
+ "action_tokenizer": "extra_action_tokenizer",
31
+ "base_vlm": "ckpt/Eagle2-2B",
32
+ "data_mix": "bridge_rt_1",
33
+ "enable_gradient_checkpointing": true,
34
+ "enable_mixed_precision_training": true,
35
+ "epochs": 100,
36
+ "expected_world_size": 8,
37
+ "freeze_llm_backbone": false,
38
+ "freeze_vision_backbone": false,
39
+ "global_batch_size": 128,
40
+ "learning_rate": 5e-05,
41
+ "lr_scheduler_type": "constant",
42
+ "max_grad_norm": 1.0,
43
+ "max_steps": null,
44
+ "per_device_batch_size": 16,
45
+ "reduce_in_full_precision": true,
46
+ "shuffle_buffer_size": 250000,
47
+ "train_strategy": "fsdp-full-shard",
48
+ "type": "prism-qwen25-dinosiglip-224px+0_5b",
49
+ "unfreeze_last_llm_layer": false,
50
+ "vla_id": "prism-qwen25-dinosiglip-224px+0_5b",
51
+ "warmup_ratio": 0.0,
52
+ "weight_decay": 0.0
53
+ },
54
+ "wandb_entity": "shuaiyang2003",
55
+ "wandb_project": "dual_sys_code_clean",
56
+ "with_pointing": false
57
+ }
config.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ action_dim: 7
2
+ data_root_dir: /mnt/petrelfs/yangshuai1/rep/InstructVLA_official/cache
3
+ debug: false
4
+ disable_instruction: false
5
+ fix_system1: false
6
+ future_action_window_size: 15
7
+ hf_token: .hf_token
8
+ image_aug: true
9
+ is_resume: true
10
+ load_all_data_for_training: true
11
+ num_of_meta_query: 64
12
+ past_action_window_size: 0
13
+ pretrained_checkpoint: /mnt/petrelfs/yangshuai1/rep/InstructVLA_official/outputs/code_reimp/sys12_meta_query_action_only_sync_pretraining_v2_query_64_mlp_lora_reimplement_transformer_4_50_single_node_bs128_2--image_augstage1/checkpoints/step-180000-epoch-03-loss=0.1214.pt
14
+ repeated_diffusion_steps: 4
15
+ resume_epoch: 3
16
+ resume_step: 180000
17
+ run_id: sys12_meta_query_action_only_sync_pretraining_v2_query_64_mlp_lora_reimplement_transformer_4_50_single_node_bs128_2--image_augstage1
18
+ run_id_note: null
19
+ run_root_dir: outputs/code_reimp
20
+ save_interval: 20000
21
+ seed: 42
22
+ stage: stage1
23
+ trackers:
24
+ - jsonl
25
+ - wandb
26
+ use_mm: false
27
+ vla:
28
+ action_tokenizer: extra_action_tokenizer
29
+ base_vlm: ckpt/Eagle2-2B
30
+ data_mix: bridge_rt_1
31
+ enable_gradient_checkpointing: true
32
+ enable_mixed_precision_training: true
33
+ epochs: 100
34
+ expected_world_size: 8
35
+ freeze_llm_backbone: false
36
+ freeze_vision_backbone: false
37
+ global_batch_size: 128
38
+ learning_rate: 5.0e-05
39
+ lr_scheduler_type: constant
40
+ max_grad_norm: 1.0
41
+ max_steps: null
42
+ per_device_batch_size: 16
43
+ reduce_in_full_precision: true
44
+ shuffle_buffer_size: 250000
45
+ train_strategy: fsdp-full-shard
46
+ type: prism-qwen25-dinosiglip-224px+0_5b
47
+ unfreeze_last_llm_layer: false
48
+ vla_id: prism-qwen25-dinosiglip-224px+0_5b
49
+ warmup_ratio: 0.0
50
+ weight_decay: 0.0
51
+ wandb_entity: shuaiyang2003
52
+ wandb_project: dual_sys_code_clean
53
+ with_pointing: false
dataset_statistics.json ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bridge_dataset": {
3
+ "action": {
4
+ "mean": [
5
+ 0.0002334193413844332,
6
+ 0.0001300490548601374,
7
+ -0.0001276246621273458,
8
+ -0.00015565502690151334,
9
+ -0.0004039333143737167,
10
+ 0.0002355769247515127,
11
+ 0.5764579772949219
12
+ ],
13
+ "std": [
14
+ 0.009765916503965855,
15
+ 0.013689138926565647,
16
+ 0.012667354196310043,
17
+ 0.02853417582809925,
18
+ 0.0306379534304142,
19
+ 0.07691461592912674,
20
+ 0.49737000465393066
21
+ ],
22
+ "max": [
23
+ 0.41691166162490845,
24
+ 0.25864794850349426,
25
+ 0.21218234300613403,
26
+ 3.122201919555664,
27
+ 1.8618112802505493,
28
+ 6.280478477478027,
29
+ 1.0
30
+ ],
31
+ "min": [
32
+ -0.4007510244846344,
33
+ -0.13874775171279907,
34
+ -0.22553899884223938,
35
+ -3.2010786533355713,
36
+ -1.8618112802505493,
37
+ -6.279075622558594,
38
+ 0.0
39
+ ],
40
+ "q01": [
41
+ -0.02872725307941437,
42
+ -0.04170349963009357,
43
+ -0.026093858778476715,
44
+ -0.08092105075716972,
45
+ -0.09288699507713317,
46
+ -0.20718276381492615,
47
+ 0.0
48
+ ],
49
+ "q99": [
50
+ 0.028309678435325586,
51
+ 0.040855254605412394,
52
+ 0.040161586627364146,
53
+ 0.08192047759890528,
54
+ 0.07792850524187081,
55
+ 0.20382574498653397,
56
+ 1.0
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ false
66
+ ]
67
+ },
68
+ "proprio": {
69
+ "mean": [
70
+ 0.3094092905521393,
71
+ 0.030575834214687347,
72
+ 0.06454048305749893,
73
+ 0.006824127864092588,
74
+ -0.07762698084115982,
75
+ 0.10757855325937271,
76
+ 0.0,
77
+ 0.7084035277366638
78
+ ],
79
+ "std": [
80
+ 0.060528464615345,
81
+ 0.09188621491193771,
82
+ 0.05159851908683777,
83
+ 0.13182717561721802,
84
+ 0.17031119763851166,
85
+ 0.5767312049865723,
86
+ 0.0,
87
+ 0.3519800305366516
88
+ ],
89
+ "max": [
90
+ 0.5862360596656799,
91
+ 0.4034728705883026,
92
+ 0.36494991183280945,
93
+ 1.514088749885559,
94
+ 1.570796251296997,
95
+ 3.1415255069732666,
96
+ 0.0,
97
+ 1.1154625415802002
98
+ ],
99
+ "min": [
100
+ -0.04167502000927925,
101
+ -0.3945816159248352,
102
+ -0.15537554025650024,
103
+ -3.141592502593994,
104
+ -1.4992541074752808,
105
+ -3.14153790473938,
106
+ 0.0,
107
+ 0.04637829214334488
108
+ ],
109
+ "q01": [
110
+ 0.17111587673425674,
111
+ -0.16998695254325866,
112
+ -0.05544630073010921,
113
+ -0.366876106262207,
114
+ -0.5443069756031036,
115
+ -1.3536006283760071,
116
+ 0.0,
117
+ 0.052190229296684265
118
+ ],
119
+ "q99": [
120
+ 0.45320980012416834,
121
+ 0.23518154799938193,
122
+ 0.1951873075962065,
123
+ 0.3806115746498103,
124
+ 0.2789784955978382,
125
+ 1.8410426235198971,
126
+ 0.0,
127
+ 1.0105689764022827
128
+ ]
129
+ },
130
+ "num_transitions": 2135463,
131
+ "num_trajectories": 60064
132
+ },
133
+ "fractal20220817_data": {
134
+ "action": {
135
+ "mean": [
136
+ 0.006987507455050945,
137
+ 0.0062658516690135,
138
+ -0.012625164352357388,
139
+ 0.04333285614848137,
140
+ -0.005756272468715906,
141
+ 0.0009130411199294031,
142
+ 0.5354204773902893
143
+ ],
144
+ "std": [
145
+ 0.06921109557151794,
146
+ 0.05970890820026398,
147
+ 0.0735311210155487,
148
+ 0.15610598027706146,
149
+ 0.1316441297531128,
150
+ 0.1459376960992813,
151
+ 0.49711623787879944
152
+ ],
153
+ "max": [
154
+ 2.9984593391418457,
155
+ 22.09052848815918,
156
+ 2.7507524490356445,
157
+ 1.570636510848999,
158
+ 1.5321086645126343,
159
+ 1.5691522359848022,
160
+ 1.0
161
+ ],
162
+ "min": [
163
+ -2.0204520225524902,
164
+ -5.497899532318115,
165
+ -2.031663417816162,
166
+ -1.569917917251587,
167
+ -1.569892168045044,
168
+ -1.570419430732727,
169
+ 0.0
170
+ ],
171
+ "q01": [
172
+ -0.22453527510166169,
173
+ -0.14820013284683228,
174
+ -0.231589707583189,
175
+ -0.3517994859814644,
176
+ -0.4193011274933815,
177
+ -0.43643461108207704,
178
+ 0.0
179
+ ],
180
+ "q99": [
181
+ 0.17824687153100965,
182
+ 0.14938379630446405,
183
+ 0.21842354819178575,
184
+ 0.5892666035890578,
185
+ 0.35272657424211445,
186
+ 0.44796681255102094,
187
+ 1.0
188
+ ],
189
+ "mask": [
190
+ true,
191
+ true,
192
+ true,
193
+ true,
194
+ true,
195
+ true,
196
+ false
197
+ ]
198
+ },
199
+ "proprio": {
200
+ "mean": [
201
+ 0.5598955750465393,
202
+ -0.08333974331617355,
203
+ 0.7771074175834656,
204
+ -0.2480376660823822,
205
+ 0.4951733648777008,
206
+ 0.09266174584627151,
207
+ 0.20975501835346222,
208
+ 0.42613404989242554
209
+ ],
210
+ "std": [
211
+ 0.12432783097028732,
212
+ 0.11558859050273895,
213
+ 0.24595840275287628,
214
+ 0.5126973986625671,
215
+ 0.5218129754066467,
216
+ 0.16630405187606812,
217
+ 0.2754833996295929,
218
+ 0.45545175671577454
219
+ ],
220
+ "max": [
221
+ 1.0534898042678833,
222
+ 0.48018959164619446,
223
+ 1.6896663904190063,
224
+ 0.9999993443489075,
225
+ 0.9999874830245972,
226
+ 0.9554369449615479,
227
+ 0.9914546012878418,
228
+ 1.0
229
+ ],
230
+ "min": [
231
+ -0.4436439275741577,
232
+ -0.9970501065254211,
233
+ -0.006579156965017319,
234
+ -0.8643477559089661,
235
+ -0.7079970240592957,
236
+ -0.7688722014427185,
237
+ -0.4999994933605194,
238
+ 0.0
239
+ ],
240
+ "q01": [
241
+ 0.32481380939483645,
242
+ -0.28334290891885755,
243
+ 0.14107070609927178,
244
+ -0.686474204659462,
245
+ -0.6808923494815826,
246
+ -0.36045596331357954,
247
+ -0.454380963742733,
248
+ 0.0
249
+ ],
250
+ "q99": [
251
+ 0.8750156319141384,
252
+ 0.21247054174542404,
253
+ 1.0727112340927123,
254
+ 0.9377871316671368,
255
+ 0.9563051050901409,
256
+ 0.45990042358636823,
257
+ 0.7216041100025177,
258
+ 1.0
259
+ ]
260
+ },
261
+ "num_transitions": 3786400,
262
+ "num_trajectories": 87212
263
+ }
264
+ }
results_step-240000-epoch-05-loss=0.0910_1/log/final_result.log ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ***Pick coke can results***
2
+ --------------------
3
+ horizontal sim variant avg success {'CogACT-Base': 0.9644444444444444}
4
+ vertical sim variant avg success {'CogACT-Base': 0.9600000000000001}
5
+ standing sim variant avg success {'CogACT-Base': 0.9022222222222224}
6
+ avg_orientation_sim_variant_results [0.9422222222222224]
7
+ --------------------
8
+ Orientation horizontal, ckpt CogACT-Base all robot arm visual matching success: [1.0, 0.88, 0.88, 0.92]
9
+ Orientation vertical, ckpt CogACT-Base all robot arm visual matching success: [0.92, 0.88, 0.96, 0.96]
10
+ Orientation standing, ckpt CogACT-Base all robot arm visual matching success: [0.84, 0.84, 0.84, 0.84]
11
+ horizontal visual matching sim success {'CogACT-Base': 0.9199999999999999}
12
+ vertical visual matching sim success {'CogACT-Base': 0.9299999999999999}
13
+ standing visual matching sim success {'CogACT-Base': 0.84}
14
+ avg_orientation_sim_visual_matching_results [0.8966666666666666]
15
+ ********************
16
+
17
+
18
+
19
+ ***Move Near results***
20
+ --------------------
21
+ sim variant avg success {'CogACT-Base': 0.7645833333333333}
22
+ --------------------
23
+ Ckpt CogACT-Base all robot arm visual matching success: [0.7166666666666667, 0.6166666666666667, 0.6833333333333333, 0.6666666666666666]
24
+ sim visual matching success {'CogACT-Base': 0.6708333333333333}
25
+ ********************
26
+
27
+
28
+
29
+ ***Drawer results***
30
+ --------------------
31
+ open sim variant avg success {'CogACT-Base': 0.5873015873015872}
32
+ close sim variant avg success {'CogACT-Base': 0.6560846560846562}
33
+ avg_sim_variant_results [0.6216931216931216]
34
+ --------------------
35
+ Drawer task open, ckpt CogACT-Base all robot arm visual matching success: [0.5555555555555556, 0.5277777777777778, 0.41666666666666663, 0.5185185185185185]
36
+ Drawer task close, ckpt CogACT-Base all robot arm visual matching success: [0.75, 0.75, 0.5555555555555556, 0.7407407407407408]
37
+ open visual matching sim success {'CogACT-Base': 0.49999999999999994}
38
+ close visual matching sim success {'CogACT-Base': 0.6851851851851852}
39
+ avg_sim_visual_matching_results [0.5925925925925926]
40
+ ********************
41
+
42
+
43
+
44
+ ***Drawer results***
45
+ --------------------
46
+ put_apple_into_top_drawer sim variant avg success {'CogACT-Base': 0.3968253968253968}
47
+ avg_sim_variant_results [0.3968253968253968]
48
+ --------------------
49
+ Drawer task put_apple_into_top_drawer, ckpt CogACT-Base all robot arm visual matching success: [0.32407407407407407, 0.3209876543209876, 0.31481481481481477, 0.3333333333333333]
50
+ put_apple_into_top_drawer visual matching sim success {'CogACT-Base': 0.32407407407407407}
51
+ avg_sim_visual_matching_results [0.32407407407407407]
52
+ ********************
53
+
54
+
55
+
56
+ ***Bridge Put On Env results***
57
+ ********** Results for put_spoon_on_tablecloth **********
58
+ sim visual matching partial success {'CogACT-Base': 0.7916666666666666}
59
+ sim visual matching success {'CogACT-Base': 0.5}
60
+ ********************
61
+
62
+
63
+
64
+ ********** Results for put_carrot_on_plate **********
65
+ sim visual matching partial success {'CogACT-Base': 0.5416666666666666}
66
+ sim visual matching success {'CogACT-Base': 0.5}
67
+ ********************
68
+
69
+
70
+
71
+ ********** Results for stack_green_block_on_yellow_block **********
72
+ sim visual matching partial success {'CogACT-Base': 0.4166666666666667}
73
+ sim visual matching success {'CogACT-Base': 0.08333333333333333}
74
+ ********************
75
+
76
+
77
+
78
+ ********** Results for put_eggplant_in_basket **********
79
+ sim visual matching partial success {'CogACT-Base': 1.0}
80
+ sim visual matching success {'CogACT-Base': 1.0}
81
+ ********************
82
+
83
+
84
+
85
+
86
+ google_var:
87
+ 94.2
88
+ 94.2
89
+ 76.5
90
+ 76.5
91
+ 62.2
92
+ 62.2
93
+ 39.7
94
+ 39.7
95
+
96
+ google_matching
97
+ 89.7
98
+ 89.7
99
+ 67.1
100
+ 67.1
101
+ 59.3
102
+ 59.3
103
+ 32.4
104
+ 32.4
105
+
106
+ widowx_matching
107
+ 50.0
108
+ 50.0
109
+ 50.0
110
+ 50.0
111
+ 8.3
112
+ 8.3
113
+ 100.0
114
+ 100.0
results_step-240000-epoch-05-loss=0.0910_1/log/log1.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_1/log/log2.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_1/log/log3.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:435efdea2aacce6285c3f1e125b70aa1aa75efb01ae333f954aa652675db564c
3
+ size 13953553
results_step-240000-epoch-05-loss=0.0910_1/log/log4.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_1/log/log5.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_1/log/log6.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_1/log/log7.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6179126e5e0e2b90cd25760d6492ac99ac0a12947e512390b6c9988e5ab9392
3
+ size 26761819
results_step-240000-epoch-05-loss=0.0910_1/log/log8.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_2/log/final_result.log ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ***Pick coke can results***
2
+ --------------------
3
+ horizontal sim variant avg success {'CogACT-Base': 0.9555555555555555}
4
+ vertical sim variant avg success {'CogACT-Base': 0.9600000000000001}
5
+ standing sim variant avg success {'CogACT-Base': 0.9022222222222224}
6
+ avg_orientation_sim_variant_results [0.9392592592592593]
7
+ --------------------
8
+ Orientation horizontal, ckpt CogACT-Base all robot arm visual matching success: [0.92, 0.88, 0.88, 0.96]
9
+ Orientation vertical, ckpt CogACT-Base all robot arm visual matching success: [0.88, 0.84, 0.96, 0.88]
10
+ Orientation standing, ckpt CogACT-Base all robot arm visual matching success: [0.92, 0.88, 0.84, 0.92]
11
+ horizontal visual matching sim success {'CogACT-Base': 0.91}
12
+ vertical visual matching sim success {'CogACT-Base': 0.8899999999999999}
13
+ standing visual matching sim success {'CogACT-Base': 0.89}
14
+ avg_orientation_sim_visual_matching_results [0.8966666666666666]
15
+ ********************
16
+
17
+
18
+
19
+ ***Move Near results***
20
+ --------------------
21
+ sim variant avg success {'CogACT-Base': 0.7666666666666667}
22
+ --------------------
23
+ Ckpt CogACT-Base all robot arm visual matching success: [0.6833333333333333, 0.6833333333333333, 0.7, 0.7166666666666667]
24
+ sim visual matching success {'CogACT-Base': 0.6958333333333333}
25
+ ********************
26
+
27
+
28
+
29
+ ***Drawer results***
30
+ --------------------
31
+ open sim variant avg success {'CogACT-Base': 0.5396825396825398}
32
+ close sim variant avg success {'CogACT-Base': 0.6931216931216931}
33
+ avg_sim_variant_results [0.6164021164021165]
34
+ --------------------
35
+ Drawer task open, ckpt CogACT-Base all robot arm visual matching success: [0.611111111111111, 0.5833333333333333, 0.41666666666666663, 0.5555555555555555]
36
+ Drawer task close, ckpt CogACT-Base all robot arm visual matching success: [0.861111111111111, 0.7222222222222221, 0.6666666666666665, 0.8148148148148148]
37
+ open visual matching sim success {'CogACT-Base': 0.537037037037037}
38
+ close visual matching sim success {'CogACT-Base': 0.75}
39
+ avg_sim_visual_matching_results [0.6435185185185185]
40
+ ********************
41
+
42
+
43
+
44
+ ***Drawer results***
45
+ --------------------
46
+ put_apple_into_top_drawer sim variant avg success {'CogACT-Base': 0.3492063492063492}
47
+ avg_sim_variant_results [0.3492063492063492]
48
+ --------------------
49
+ Drawer task put_apple_into_top_drawer, ckpt CogACT-Base all robot arm visual matching success: [0.2962962962962963, 0.30864197530864196, 0.31481481481481477, 0.2962962962962963]
50
+ put_apple_into_top_drawer visual matching sim success {'CogACT-Base': 0.2962962962962963}
51
+ avg_sim_visual_matching_results [0.2962962962962963]
52
+ ********************
53
+
54
+
55
+
56
+ ***Bridge Put On Env results***
57
+ ********** Results for put_spoon_on_tablecloth **********
58
+ sim visual matching partial success {'CogACT-Base': 0.9583333333333334}
59
+ sim visual matching success {'CogACT-Base': 0.6666666666666666}
60
+ ********************
61
+
62
+
63
+
64
+ ********** Results for put_carrot_on_plate **********
65
+ sim visual matching partial success {'CogACT-Base': 0.6666666666666666}
66
+ sim visual matching success {'CogACT-Base': 0.4583333333333333}
67
+ ********************
68
+
69
+
70
+
71
+ ********** Results for stack_green_block_on_yellow_block **********
72
+ sim visual matching partial success {'CogACT-Base': 0.5416666666666666}
73
+ sim visual matching success {'CogACT-Base': 0.041666666666666664}
74
+ ********************
75
+
76
+
77
+
78
+ ********** Results for put_eggplant_in_basket **********
79
+ sim visual matching partial success {'CogACT-Base': 0.9583333333333334}
80
+ sim visual matching success {'CogACT-Base': 0.9166666666666666}
81
+ ********************
82
+
83
+
84
+
85
+
86
+ google_var:
87
+ 93.9
88
+ 93.9
89
+ 76.7
90
+ 76.7
91
+ 61.6
92
+ 61.6
93
+ 34.9
94
+ 34.9
95
+
96
+ google_matching
97
+ 89.7
98
+ 89.7
99
+ 69.6
100
+ 69.6
101
+ 64.4
102
+ 64.4
103
+ 29.6
104
+ 29.6
105
+
106
+ widowx_matching
107
+ 66.7
108
+ 66.7
109
+ 45.8
110
+ 45.8
111
+ 4.2
112
+ 4.2
113
+ 91.7
114
+ 91.7
results_step-240000-epoch-05-loss=0.0910_2/log/log1.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_2/log/log2.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_2/log/log3.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e51dc8d0522974bf2a585fc8308dff74276d7282564e6955d1f9fe8fa8d7d491
3
+ size 13952635
results_step-240000-epoch-05-loss=0.0910_2/log/log4.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_2/log/log5.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_2/log/log6.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_2/log/log7.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:871b30c8e6d5a0777f216c0df35f7eb06f08d4da9b70b8f2d70b46ecaf72968d
3
+ size 26758058
results_step-240000-epoch-05-loss=0.0910_2/log/log8.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_3/log/final_result.log ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ***Pick coke can results***
2
+ --------------------
3
+ horizontal sim variant avg success {'CogACT-Base': 0.9333333333333333}
4
+ vertical sim variant avg success {'CogACT-Base': 0.9688888888888888}
5
+ standing sim variant avg success {'CogACT-Base': 0.9111111111111112}
6
+ avg_orientation_sim_variant_results [0.9377777777777778]
7
+ --------------------
8
+ Orientation horizontal, ckpt CogACT-Base all robot arm visual matching success: [0.96, 0.72, 0.76, 0.88]
9
+ Orientation vertical, ckpt CogACT-Base all robot arm visual matching success: [1.0, 0.96, 0.88, 1.0]
10
+ Orientation standing, ckpt CogACT-Base all robot arm visual matching success: [0.88, 0.8, 0.76, 0.8]
11
+ horizontal visual matching sim success {'CogACT-Base': 0.83}
12
+ vertical visual matching sim success {'CogACT-Base': 0.96}
13
+ standing visual matching sim success {'CogACT-Base': 0.81}
14
+ avg_orientation_sim_visual_matching_results [0.8666666666666667]
15
+ ********************
16
+
17
+
18
+
19
+ ***Move Near results***
20
+ --------------------
21
+ sim variant avg success {'CogACT-Base': 0.7749999999999999}
22
+ --------------------
23
+ Ckpt CogACT-Base all robot arm visual matching success: [0.7166666666666667, 0.5666666666666667, 0.6166666666666667, 0.7166666666666667]
24
+ sim visual matching success {'CogACT-Base': 0.6541666666666667}
25
+ ********************
26
+
27
+
28
+
29
+ ***Drawer results***
30
+ --------------------
31
+ open sim variant avg success {'CogACT-Base': 0.5873015873015873}
32
+ close sim variant avg success {'CogACT-Base': 0.7037037037037037}
33
+ avg_sim_variant_results [0.6455026455026456]
34
+ --------------------
35
+ Drawer task open, ckpt CogACT-Base all robot arm visual matching success: [0.5555555555555556, 0.5555555555555556, 0.5277777777777778, 0.48148148148148145]
36
+ Drawer task close, ckpt CogACT-Base all robot arm visual matching success: [0.7222222222222222, 0.8333333333333333, 0.49999999999999994, 0.7037037037037037]
37
+ open visual matching sim success {'CogACT-Base': 0.5462962962962963}
38
+ close visual matching sim success {'CogACT-Base': 0.6851851851851851}
39
+ avg_sim_visual_matching_results [0.6157407407407407]
40
+ ********************
41
+
42
+
43
+
44
+ ***Drawer results***
45
+ --------------------
46
+ put_apple_into_top_drawer sim variant avg success {'CogACT-Base': 0.43386243386243384}
47
+ avg_sim_variant_results [0.43386243386243384]
48
+ --------------------
49
+ Drawer task put_apple_into_top_drawer, ckpt CogACT-Base all robot arm visual matching success: [0.33333333333333337, 0.32098765432098764, 0.2962962962962963, 0.2222222222222222]
50
+ put_apple_into_top_drawer visual matching sim success {'CogACT-Base': 0.33333333333333337}
51
+ avg_sim_visual_matching_results [0.33333333333333337]
52
+ ********************
53
+
54
+
55
+
56
+ ***Bridge Put On Env results***
57
+ ********** Results for put_spoon_on_tablecloth **********
58
+ sim visual matching partial success {'CogACT-Base': 0.8333333333333334}
59
+ sim visual matching success {'CogACT-Base': 0.7083333333333334}
60
+ ********************
61
+
62
+
63
+
64
+ ********** Results for put_carrot_on_plate **********
65
+ sim visual matching partial success {'CogACT-Base': 0.625}
66
+ sim visual matching success {'CogACT-Base': 0.5}
67
+ ********************
68
+
69
+
70
+
71
+ ********** Results for stack_green_block_on_yellow_block **********
72
+ sim visual matching partial success {'CogACT-Base': 0.5416666666666666}
73
+ sim visual matching success {'CogACT-Base': 0.125}
74
+ ********************
75
+
76
+
77
+
78
+ ********** Results for put_eggplant_in_basket **********
79
+ sim visual matching partial success {'CogACT-Base': 0.9583333333333334}
80
+ sim visual matching success {'CogACT-Base': 0.9583333333333334}
81
+ ********************
82
+
83
+
84
+
85
+
86
+ google_var:
87
+ 93.8
88
+ 93.8
89
+ 77.5
90
+ 77.5
91
+ 64.6
92
+ 64.6
93
+ 43.4
94
+ 43.4
95
+
96
+ google_matching
97
+ 86.7
98
+ 86.7
99
+ 65.4
100
+ 65.4
101
+ 61.6
102
+ 61.6
103
+ 33.3
104
+ 33.3
105
+
106
+ widowx_matching
107
+ 70.8
108
+ 70.8
109
+ 50.0
110
+ 50.0
111
+ 12.5
112
+ 12.5
113
+ 95.8
114
+ 95.8
results_step-240000-epoch-05-loss=0.0910_3/log/log1.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_3/log/log2.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_3/log/log3.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4df2cba4960fc3755d2888cb080dbe926b15741ab218fe5fc7c9e0a94c8cbedb
3
+ size 13953631
results_step-240000-epoch-05-loss=0.0910_3/log/log4.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_3/log/log5.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_3/log/log6.log ADDED
The diff for this file is too large to render. See raw diff
 
results_step-240000-epoch-05-loss=0.0910_3/log/log7.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d737720240837dd5ce73f8b79e0194c3d07ca1df7e9fa073dfadbe14f5d755ff
3
+ size 26761982
results_step-240000-epoch-05-loss=0.0910_3/log/log8.log ADDED
The diff for this file is too large to render. See raw diff
 
run-metrics.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"hparams": {"action_dim": 7, "data_root_dir": "/mnt/petrelfs/yangshuai1/rep/InstructVLA_official/cache", "debug": false, "disable_instruction": false, "fix_system1": false, "future_action_window_size": 15, "hf_token": ".hf_token", "image_aug": true, "is_resume": true, "load_all_data_for_training": true, "num_of_meta_query": 64, "past_action_window_size": 0, "pretrained_checkpoint": "/mnt/petrelfs/yangshuai1/rep/InstructVLA_official/outputs/code_reimp/sys12_meta_query_action_only_sync_pretraining_v2_query_64_mlp_lora_reimplement_transformer_4_50_single_node_bs128_2--image_augstage1/checkpoints/step-180000-epoch-03-loss=0.1214.pt", "repeated_diffusion_steps": 4, "resume_epoch": 3, "resume_step": 180000, "run_id": "sys12_meta_query_action_only_sync_pretraining_v2_query_64_mlp_lora_reimplement_transformer_4_50_single_node_bs128_2--image_augstage1", "run_id_note": null, "run_root_dir": "outputs/code_reimp", "save_interval": 20000, "seed": 42, "stage": "stage1", "trackers": ["jsonl", "wandb"], "use_mm": false, "vla": {"action_tokenizer": "extra_action_tokenizer", "base_vlm": "ckpt/Eagle2-2B", "data_mix": "bridge_rt_1", "enable_gradient_checkpointing": true, "enable_mixed_precision_training": true, "epochs": 100, "expected_world_size": 8, "freeze_llm_backbone": false, "freeze_vision_backbone": false, "global_batch_size": 128, "learning_rate": 5e-05, "lr_scheduler_type": "constant", "max_grad_norm": 1.0, "max_steps": null, "per_device_batch_size": 16, "reduce_in_full_precision": true, "shuffle_buffer_size": 250000, "train_strategy": "fsdp-full-shard", "type": "prism-qwen25-dinosiglip-224px+0_5b", "unfreeze_last_llm_layer": false, "vla_id": "prism-qwen25-dinosiglip-224px+0_5b", "warmup_ratio": 0.0, "weight_decay": 0.0}, "wandb_entity": "shuaiyang2003", "wandb_project": "dual_sys_code_clean", "with_pointing": false}, "run_id": "sys12_meta_query_action_only_sync_pretraining_v2_query_64_mlp_lora_reimplement_transformer_4_50_single_node_bs128_2--image_augstage1"}
sys12_meta_query_action_only_sync_pretraining_v2_query_64_mlp_lora_reimplement_transformer_4_50_single_node_bs128_2--image_augstage1.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1603b9ec92ce156f49e36c028bdb6cddd084e536c649ce416d0ae6f4bc6f533
3
+ size 92516492