[ { "id": 1982, "image": "images/1982_testset_0000394.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"throw the soccer_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[161, 256, 183, 281]" } ] }, { "id": 1091, "image": "images/1091_motorcycle_000923.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the motorcycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[456, 231, 510, 277]" } ] }, { "id": 6498, "image": "images/6498_thu_cut_fruit_RGB_cut_fruit_cwx_2_22.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[512, 196, 560, 252]" } ] }, { "id": 24, "image": "images/24_6.png", "conversations": [ { "from": "human", "value": "\nYou are a robot using the joint control. The task is \"take the basket\". Please predict a possible affordance area of the end effector?" }, { "from": "gpt", "value": "[817, 270, 1095, 436]" } ] }, { "id": 2420, "image": "images/2420_ego4d_7aca7552-56d2-4ac0-be30-aed9c5dfc8ea_9000.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"lie_on the couch\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[813, 442, 930, 558]" } ] }, { "id": 1045, "image": "images/1045_bench_002635.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the bench\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[321, 169, 508, 224]" } ] }, { "id": 6738, "image": "images/6738_cup_002554.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"pour the cup\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[224, 293, 277, 356]" } ] }, { "id": 5909, "image": "images/5909_bicycle_001556.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[117, 113, 227, 192]" } ] }, { "id": 7259, "image": "images/7259_golf_clubs_001094.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"swing the golf_clubs\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[358, 117, 424, 229]" } ] }, { "id": 6523, "image": "images/6523_soccer_ball_002817.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"kick the soccer_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[231, 117, 378, 275]" } ] }, { "id": 3428, "image": "images/3428_testset_0001985.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"ride the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[68, 36, 100, 62]" } ] }, { "id": 7249, "image": "images/7249_banana_000253.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"peel the banana\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[216, 119, 387, 396]" } ] }, { "id": 6568, "image": "images/6568_testset_0000092.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"kick the soccer_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[120, 189, 148, 217]" } ] }, { "id": 5999, "image": "images/5999_ego4d_6b889d80-1c10-413c-9532-fbd629305a43_36300.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"type_on the laptop\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[1507, 115, 1918, 315]" } ] }, { "id": 2950, "image": "images/2950_bicycle_000661.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"push the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[483, 0, 533, 24]" } ] }, { "id": 2050, "image": "images/2050_baseball_002670.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"throw the baseball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[307, 166, 364, 222]" } ] }, { "id": 3501, "image": "images/3501_testset_0000898.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"ride the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[409, 261, 459, 319]" } ] }, { "id": 5111, "image": "images/5111_youtube_Making a T-Bone Steak POV First Person Cooking-znuEJcLMSjw-1080p-1644527668544_003600.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"wash the bowl\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[817, 585, 1186, 1005]" } ] }, { "id": 4416, "image": "images/4416_testset_0000636.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the skateboard\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[325, 370, 466, 444]" } ] }, { "id": 5277, "image": "images/5277_epic_P23_02_frame_0000071911.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"stir the bowl\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[222, 96, 293, 163]" } ] }, { "id": 5348, "image": "images/5348_ego4d_8af2ed7d-4435-40ce-b3cc-685f28bfcb2d_37500.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"take_photo the cell_phone\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[1182, 355, 1372, 505]" } ] }, { "id": 2722, "image": "images/2722_testset_0001597.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"lie_on the surfboard\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[34, 222, 82, 342]" } ] }, { "id": 4035, "image": "images/4035_testset_0001460.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the suitcase\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[504, 370, 576, 417]" } ] }, { "id": 2999, "image": "images/2999_thu_cut_fruit_RGB_cut_fruit_tys_3_99.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[355, 60, 408, 113]" } ] }, { "id": 1217, "image": "images/1217_testset_0001818.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the chair\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[582, 487, 647, 537]" } ] }, { "id": 6395, "image": "images/6395_knife_001124.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[412, 62, 577, 189]" } ] }, { "id": 4496, "image": "images/4496_frisbee_002270.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the frisbee\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[58, 73, 925, 931]" } ] }, { "id": 3348, "image": "images/3348_motorcycle_002093.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"ride the motorcycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[286, 134, 337, 184]" } ] }, { "id": 5290, "image": "images/5290_ego4d_de8ea8a7-6bdb-4679-9b17-9dc6a79e45e7_4800.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"take_photo the cell_phone\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[889, 424, 1018, 560]" } ] }, { "id": 6667, "image": "images/6667_bicycle_002350.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"ride the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[389, 72, 455, 120]" } ] }, { "id": 7248, "image": "images/7248_banana_000480.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"peel the banana\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[289, 112, 359, 296]" } ] }, { "id": 4655, "image": "images/4655_youtube_how to cook PASTA RISOTTATA UNDER 30 MINUTE PASTA MADE LIKE RISOTTO JONAH IN THE KITCHEN-QHjXmwMg3MY-1080p-1644528336886 (1)_012420.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"open the bottle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[760, 284, 885, 409]" } ] }, { "id": 4731, "image": "images/4731_thu_read_book_RGB_read_book_cwx_3_86.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"open the book\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[275, 255, 526, 348]" } ] }, { "id": 3375, "image": "images/3375_testset_0001667.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"ride the motorbike\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[493, 181, 548, 227]" } ] }, { "id": 3059, "image": "images/3059_knife_000029.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[407, 149, 456, 204]" } ] }, { "id": 5885, "image": "images/5885_testset_0001139.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[437, 235, 481, 275]" } ] }, { "id": 1000, "image": "images/1000_bench_001222.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the bench\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[316, 286, 394, 330]" } ] }, { "id": 1705, "image": "images/1705_skateboard_002490.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"carry the skateboard\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[279, 191, 350, 248]" } ] }, { "id": 7081, "image": "images/7081_thu_stir_RGB_stir_lhh_3_60.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sip the cup\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[289, 132, 378, 211]" } ] }, { "id": 647, "image": "images/647_thu_stir_RGB_stir_lhh_3_60.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"drink_with the cup\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[289, 132, 378, 211]" } ] }, { "id": 5360, "image": "images/5360_ego4d_f5b87541-3879-444f-be65-2b17f1d95735_24450.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"take_photo the cell_phone\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[1547, 200, 1701, 413]" } ] }, { "id": 5, "image": "images/5_17.png", "conversations": [ { "from": "human", "value": "\nYou are a robot using the joint control. The task is \"take the basket\". Please predict a possible affordance area of the end effector?" }, { "from": "gpt", "value": "[441, 960, 679, 1055]" } ] }, { "id": 3682, "image": "images/3682_ego4d_9b8e8dc6-4069-4ef4-b8b2-edeb76894d53_17250.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"pour the bottle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[992, 0, 1157, 158]" } ] }, { "id": 2483, "image": "images/2483_testset_0000557.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"lie_on the bench\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[267, 276, 509, 322]" } ] }, { "id": 5378, "image": "images/5378_camera_000542.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"take_photo the camera\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[252, 284, 492, 425]" } ] }, { "id": 4480, "image": "images/4480_axe_003531.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the axe\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[341, 136, 803, 508]" } ] }, { "id": 432, "image": "images/432_testset_0000327.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"catch the soccer_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[164, 98, 198, 129]" } ] }, { "id": 5440, "image": "images/5440_testset_0000972.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"pick_up the suitcase\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[116, 68, 159, 94]" } ] }, { "id": 0, "image": "images/0_8.png", "conversations": [ { "from": "human", "value": "\nYou are a robot using the joint control. The task is \"take the basket\". Please predict a possible affordance area of the end effector?" }, { "from": "gpt", "value": "[1158, 344, 1382, 452]" } ] }, { "id": 200, "image": "images/200_punching_bag_002699.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"boxing the punching_bag\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[141, 173, 284, 425]" } ] }, { "id": 6410, "image": "images/6410_knife_003240.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[334, 0, 428, 116]" } ] }, { "id": 5567, "image": "images/5567_banana_002174.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"peel the banana\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[49, 67, 209, 245]" } ] }, { "id": 4535, "image": "images/4535_testset_0002282.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the frisbee\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[267, 45, 366, 107]" } ] }, { "id": 3242, "image": "images/3242_testset_0001182.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"kick the soccer_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[250, 171, 277, 200]" } ] }, { "id": 6210, "image": "images/6210_bed_002956.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"lie_on the bed\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[234, 207, 496, 252]" } ] }, { "id": 4004, "image": "images/4004_ego4d_8a280483-bbfd-4f24-9654-bf6a008e406d_25050.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the scissors\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[1188, 262, 1457, 527]" } ] }, { "id": 4217, "image": "images/4217_epic_P02_122_frame_0000010441.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the bowl\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[373, 79, 409, 119]" } ] }, { "id": 2205, "image": "images/2205_testset_0001880.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"throw the frisbee\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[152, 86, 230, 136]" } ] }, { "id": 4052, "image": "images/4052_testset_0001986.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the suitcase\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[153, 24, 243, 76]" } ] }, { "id": 6386, "image": "images/6386_youtube_Making Japanese Katsudon POV First Person Cooking-YYz5a6wS2oE-1080p-1644527980851_006060.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[917, 481, 1075, 608]" } ] }, { "id": 6127, "image": "images/6127_youtube_Good_012720.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"stick the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[418, 0, 541, 141]" } ] }, { "id": 4507, "image": "images/4507_frisbee_003223.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the frisbee\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[33, 0, 839, 839]" } ] }, { "id": 6794, "image": "images/6794_ego4d_6e0a6558-c212-4cab-b374-007671edb59c_14100.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the cup\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[1087, 441, 1206, 549]" } ] }, { "id": 1420, "image": "images/1420_testset_0001955.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the skateboard\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[416, 145, 590, 240]" } ] }, { "id": 5110, "image": "images/5110_bowl_001170.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"wash the bowl\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[0, 171, 213, 228]" } ] }, { "id": 3092, "image": "images/3092_thu_cut_fruit_RGB_cut_fruit_wyy_1_82.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[293, 227, 369, 279]" } ] }, { "id": 2760, "image": "images/2760_testset_0001353.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"lie_on the surfboard\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[122, 38, 151, 127]" } ] }, { "id": 3216, "image": "images/3216_testset_0002265.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"kick the soccer_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[172, 299, 384, 543]" } ] }, { "id": 6724, "image": "images/6724_broccoli_003228.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"eat the broccoli\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[97, 128, 241, 251]" } ] }, { "id": 4638, "image": "images/4638_ego4d_11f51f72-c0bd-4631-aabb-6ecbe17d95c9_10050.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hit the axe\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[858, 154, 1091, 263]" } ] }, { "id": 2269, "image": "images/2269_banana_000977.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut the banana\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[162, 203, 307, 429]" } ] }, { "id": 1867, "image": "images/1867_keyboard_000219.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"type_on the keyboard\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[289, 114, 554, 219]" } ] }, { "id": 382, "image": "images/382_ego4d_4ac59908-3f13-4085-9479-7348b13803ee_6450.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"write the pen\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[1557, 221, 1918, 432]" } ] }, { "id": 2939, "image": "images/2939_testset_0002111.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"push the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[213, 56, 239, 76]" } ] }, { "id": 6040, "image": "images/6040_ego4d_20d62117-8b5f-44b3-b86f-a936e6cdd2c2_4950.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"type_on the laptop\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[2291, 558, 2560, 771]" } ] }, { "id": 3326, "image": "images/3326_motorcycle_002068.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"ride the motorcycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[362, 291, 417, 339]" } ] }, { "id": 790, "image": "images/790_testset_0002285.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the couch\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[344, 242, 444, 308]" } ] }, { "id": 5435, "image": "images/5435_testset_0001206.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"pick_up the suitcase\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[250, 24, 274, 46]" } ] }, { "id": 3696, "image": "images/3696_wine_glass_002765.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"pour the wine_glass\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[184, 273, 256, 506]" } ] }, { "id": 3721, "image": "images/3721_baseball_bat_003712.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the baseball_bat\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[105, 192, 289, 258]" } ] }, { "id": 1628, "image": "images/1628_testset_0001409.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"carry the surfboard\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[132, 184, 183, 220]" } ] }, { "id": 7126, "image": "images/7126_thu_use_chopstick_RGB_use_chopstick_cwx_3_54.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"wash the cup\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[339, 151, 442, 252]" } ] }, { "id": 2982, "image": "images/2982_youtube_First Person Cooking ITALIAN SAUSAGE & PEPPERS WITH POLENTA JONAH IN THE KITCHEN POV COOKING-2YPjWvmOb5o-1080p-1644528307790_002580.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[900, 142, 1084, 276]" } ] }, { "id": 4162, "image": "images/4162_testset_0001746.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the surfboard\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[72, 122, 190, 172]" } ] }, { "id": 4174, "image": "images/4174_snowboard_001840.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the snowboard\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[679, 54, 805, 260]" } ] }, { "id": 7266, "image": "images/7266_golf_clubs_000638.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"swing the golf_clubs\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[26, 80, 165, 218]" } ] }, { "id": 3592, "image": "images/3592_banana_002623.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"eat the banana\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[49, 254, 169, 449]" } ] }, { "id": 4754, "image": "images/4754_microwave_001049.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"open the microwave\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[112, 108, 621, 171]" } ] }, { "id": 3571, "image": "images/3571_apple_000735.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"eat the apple\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[174, 76, 535, 307]" } ] }, { "id": 2979, "image": "images/2979_knife_000695.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[138, 174, 241, 224]" } ] }, { "id": 5354, "image": "images/5354_ego4d_9a0a2d01-c4e0-4e30-8a69-642070d59ea9_6450.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"take_photo the cell_phone\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[1537, 374, 1701, 548]" } ] }, { "id": 1444, "image": "images/1444_bicycle_001404.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[298, 80, 348, 126]" } ] }, { "id": 5447, "image": "images/5447_testset_0001906.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"pick_up the suitcase\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[285, 0, 333, 60]" } ] }, { "id": 6518, "image": "images/6518_testset_0000299.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"kick the soccer_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[277, 240, 300, 263]" } ] }, { "id": 789, "image": "images/789_testset_0000920.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the couch\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[214, 258, 456, 348]" } ] }, { "id": 878, "image": "images/878_testset_0000949.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the bench\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[274, 201, 400, 250]" } ] }, { "id": 4620, "image": "images/4620_tennis_racket_001468.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hit the tennis_racket\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[200, 1105, 273, 1407]" } ] }, { "id": 6169, "image": "images/6169_thu_bounce_ball_RGB_bounce_ball_cwx_2_97.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"throw the basketball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[218, 173, 336, 288]" } ] }, { "id": 483, "image": "images/483_testset_0000152.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"catch the soccer_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[222, 119, 249, 144]" } ] }, { "id": 2095, "image": "images/2095_rugby_ball_002080.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"throw the rugby_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[81, 93, 207, 174]" } ] }, { "id": 5299, "image": "images/5299_ego4d_5cae3bb3-1e1e-4b9f-82e8-87dbaad8f73d_9900.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"take_photo the cell_phone\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[744, 386, 882, 666]" } ] }, { "id": 5542, "image": "images/5542_epic_P33_102_frame_0000008911.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"peel the carrot\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[26, 25, 82, 89]" } ] }, { "id": 1766, "image": "images/1766_ego4d_70e81c43-3bf3-4a3c-81df-16f5108a6918_24150.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"type_on the laptop\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[588, 645, 884, 764]" } ] }, { "id": 6663, "image": "images/6663_testset_0001042.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"ride the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[136, 69, 162, 102]" } ] }, { "id": 3264, "image": "images/3264_testset_0000353.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"kick the punching_bag\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[249, 124, 345, 468]" } ] }, { "id": 6274, "image": "images/6274_testset_0001830.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"push the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[252, 123, 278, 150]" } ] }, { "id": 6209, "image": "images/6209_testset_0001089.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"lie_on the bed\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[219, 306, 464, 413]" } ] }, { "id": 2328, "image": "images/2328_testset_0000188.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"drag the suitcase\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[186, 112, 216, 137]" } ] }, { "id": 1535, "image": "images/1535_testset_0000898.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[412, 274, 463, 322]" } ] }, { "id": 2945, "image": "images/2945_testset_0002150.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"push the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[204, 120, 291, 210]" } ] }, { "id": 6767, "image": "images/6767_escape_S05_short_cut_570.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"pour the cup\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[938, 372, 1061, 494]" } ] }, { "id": 7211, "image": "images/7211_testset_0001668.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"pick_up the skis\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[344, 236, 408, 299]" } ] }, { "id": 1393, "image": "images/1393_testset_0000436.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the motorbike\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[524, 223, 603, 283]" } ] }, { "id": 77, "image": "images/77_testset_0000454.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"jump the surfboard\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[244, 372, 320, 427]" } ] }, { "id": 5328, "image": "images/5328_youtube_First Person Cooking POPEYES CHICKEN SANDWICH COPYCAT RECIPE JONAH IN THE KITCHEN POV COOKING-lNAPNvF0kGg-1080p-1644528296768_001740.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"take_photo the cell_phone\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[295, 573, 437, 780]" } ] }, { "id": 7178, "image": "images/7178_youtube_First Person Cooking Red Pesto Pasta 30 Minute Pasta JONAH IN THE KITCHEN-FeAspQ44YFk-1080p-1644528516587_005160.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"wash the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[691, 446, 1100, 872]" } ] }, { "id": 587, "image": "images/587_testset_0000198.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"catch the frisbee\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[0, 167, 14, 234]" } ] }, { "id": 1545, "image": "images/1545_testset_0001366.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[171, 254, 216, 299]" } ] }, { "id": 2547, "image": "images/2547_testset_0000277.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"lie_on the bench\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[94, 469, 227, 556]" } ] }, { "id": 39, "image": "images/39_testset_0000686.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"jump the surfboard\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[92, 59, 130, 90]" } ] }, { "id": 6559, "image": "images/6559_testset_0000893.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"kick the soccer_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[164, 122, 208, 160]" } ] }, { "id": 5156, "image": "images/5156_epic_P01_101_frame_0000035461.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"stir the bowl\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[197, 54, 291, 160]" } ] }, { "id": 3021, "image": "images/3021_knife_000942.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[288, 236, 394, 286]" } ] }, { "id": 1392, "image": "images/1392_testset_0001231.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the motorbike\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[300, 295, 383, 343]" } ] }, { "id": 6450, "image": "images/6450_youtube_Really_001140.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[833, 546, 979, 746]" } ] }, { "id": 5683, "image": "images/5683_testset_0000166.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"catch the soccer_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[83, 288, 128, 330]" } ] }, { "id": 3417, "image": "images/3417_bicycle_001546.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"ride the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[234, 80, 289, 126]" } ] }, { "id": 284, "image": "images/284_ego4d_8f847cfc-ae36-461c-a96b-8fc58766fb2d_8700.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"write the pen\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[717, 735, 1085, 909]" } ] }, { "id": 4984, "image": "images/4984_ego4d_5c3be03c-07a5-4427-a2fe-449f16e8017b_3900.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"text_on the cell_phone\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[327, 650, 522, 856]" } ] }, { "id": 1514, "image": "images/1514_testset_0001042.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[134, 89, 160, 113]" } ] }, { "id": 7066, "image": "images/7066_cup_002410.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sip the cup\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[129, 130, 462, 234]" } ] }, { "id": 474, "image": "images/474_testset_0000092.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"catch the soccer_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[120, 189, 148, 217]" } ] }, { "id": 2393, "image": "images/2393_testset_0002092.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"lie_on the couch\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[353, 275, 619, 324]" } ] }, { "id": 1916, "image": "images/1916_thu_cut_fruit_RGB_cut_fruit_lzy_2_49.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"stick the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[387, 69, 435, 117]" } ] }, { "id": 2417, "image": "images/2417_testset_0001465.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"lie_on the couch\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[209, 321, 369, 366]" } ] }, { "id": 12, "image": "images/12_20.png", "conversations": [ { "from": "human", "value": "\nYou are a robot using the joint control. The task is \"take the basket\". Please predict a possible affordance area of the end effector?" }, { "from": "gpt", "value": "[544, 210, 1053, 378]" } ] }, { "id": 2667, "image": "images/2667_testset_0001397.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"lie_on the bench\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[188, 208, 356, 250]" } ] }, { "id": 5140, "image": "images/5140_toothbrush_000575.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"wash the toothbrush\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[130, 43, 319, 196]" } ] }, { "id": 4777, "image": "images/4777_youtube_First Person Cooking CHICKEN QUESADILLAS JONAH IN THE KITCHEN POV COOKING-zma2SgJg6l0-1080p-1644528321483 (1)_002520.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"open the oven\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[908, 824, 1261, 959]" } ] }, { "id": 6997, "image": "images/6997_ego4d_61f0a3d8-01f9-4149-bd0c-c31ad5d319ed_12750.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the axe\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[1328, 819, 1618, 1022]" } ] }, { "id": 2067, "image": "images/2067_basketball_003534.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"throw the basketball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[458, 244, 586, 373]" } ] }, { "id": 1668, "image": "images/1668_testset_0001463.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"carry the snowboard\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[132, 198, 165, 231]" } ] }, { "id": 6716, "image": "images/6716_banana_002934.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"eat the banana\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[83, 390, 213, 431]" } ] }, { "id": 1431, "image": "images/1431_testset_0001432.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[203, 175, 256, 223]" } ] }, { "id": 4379, "image": "images/4379_wine_glass_003375.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the wine_glass\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[288, 152, 439, 295]" } ] }, { "id": 2492, "image": "images/2492_testset_0001207.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"lie_on the bench\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[185, 273, 356, 318]" } ] }, { "id": 1511, "image": "images/1511_bicycle_002266.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[145, 106, 209, 159]" } ] }, { "id": 5865, "image": "images/5865_bicycle_001224.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[325, 181, 426, 232]" } ] }, { "id": 1780, "image": "images/1780_ego4d_6a85ad1a-746c-45ac-b372-b677cb6e0a3d_5850.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"type_on the laptop\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[1989, 1315, 2237, 1564]" } ] }, { "id": 3070, "image": "images/3070_youtube_Really_011880.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[831, 587, 988, 785]" } ] }, { "id": 3693, "image": "images/3693_wine_glass_002922.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"pour the wine_glass\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[300, 331, 362, 425]" } ] }, { "id": 3730, "image": "images/3730_baseball_bat_000452.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the baseball_bat\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[1084, 91, 1299, 280]" } ] }, { "id": 165, "image": "images/165_testset_0000353.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"boxing the punching_bag\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[249, 124, 345, 468]" } ] }, { "id": 3170, "image": "images/3170_scissors_002226.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the scissors\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[254, 250, 330, 362]" } ] }, { "id": 5718, "image": "images/5718_soccer_ball_002743.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"catch the soccer_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[151, 86, 222, 154]" } ] }, { "id": 26, "image": "images/26_3.png", "conversations": [ { "from": "human", "value": "\nYou are a robot using the joint control. The task is \"take the basket\". Please predict a possible affordance area of the end effector?" }, { "from": "gpt", "value": "[1182, 373, 1398, 522]" } ] }, { "id": 4852, "image": "images/4852_bottle_001569.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sip the bottle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[168, 83, 218, 125]" } ] }, { "id": 4569, "image": "images/4569_testset_0001135.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the frisbee\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[162, 105, 186, 128]" } ] }, { "id": 197, "image": "images/197_testset_0000909.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"boxing the punching_bag\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[192, 183, 265, 490]" } ] }, { "id": 3282, "image": "images/3282_punching_bag_000047.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"kick the punching_bag\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[216, 257, 271, 383]" } ] }, { "id": 477, "image": "images/477_testset_0000911.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"catch the soccer_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[158, 106, 193, 142]" } ] }, { "id": 1397, "image": "images/1397_testset_0000611.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the motorbike\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[148, 199, 246, 263]" } ] }, { "id": 2244, "image": "images/2244_carrot_000953.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut the carrot\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[341, 99, 463, 263]" } ] }, { "id": 684, "image": "images/684_bottle_002780.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"drink_with the bottle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[444, 270, 517, 339]" } ] }, { "id": 2135, "image": "images/2135_testset_0000991.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"throw the frisbee\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[494, 188, 670, 332]" } ] }, { "id": 2680, "image": "images/2680_testset_0001407.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"lie_on the bench\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[184, 513, 403, 572]" } ] }, { "id": 4619, "image": "images/4619_tennis_racket_000964.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hit the tennis_racket\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[191, 1159, 265, 1420]" } ] }, { "id": 1939, "image": "images/1939_ego4d_c2a3d4ac-8d13-4daa-a9a9-a867999e956f_29400.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"stick the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[884, 356, 989, 462]" } ] }, { "id": 16, "image": "images/16_26.png", "conversations": [ { "from": "human", "value": "\nYou are a robot using the joint control. The task is \"take the knife\". Please predict a possible affordance area of the end effector?" }, { "from": "gpt", "value": "[976, 517, 1215, 807]" } ] }, { "id": 1378, "image": "images/1378_testset_0001495.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the motorbike\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[428, 256, 495, 307]" } ] }, { "id": 4053, "image": "images/4053_testset_0001112.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the suitcase\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[94, 0, 139, 17]" } ] }, { "id": 6238, "image": "images/6238_testset_0002145.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"lie_on the bed\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[131, 164, 260, 212]" } ] }, { "id": 4834, "image": "images/4834_cup_002062.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sip the cup\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[346, 120, 542, 221]" } ] }, { "id": 21, "image": "images/21_4.png", "conversations": [ { "from": "human", "value": "\nYou are a robot using the joint control. The task is \"take the knife\". Please predict a possible affordance area of the end effector?" }, { "from": "gpt", "value": "[1391, 905, 1728, 1076]" } ] }, { "id": 3691, "image": "images/3691_thu_use_chopstick_RGB_use_chopstick_gs_2_176.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"pour the wine_glass\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[273, 346, 327, 406]" } ] }, { "id": 1160, "image": "images/1160_testset_0001004.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the chair\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[291, 393, 366, 463]" } ] }, { "id": 160, "image": "images/160_punching_bag_000345.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"boxing the punching_bag\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[90, 442, 276, 1093]" } ] }, { "id": 3035, "image": "images/3035_thu_cut_fruit_RGB_cut_fruit_tys_1_79.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[398, 68, 449, 126]" } ] }, { "id": 979, "image": "images/979_testset_0001849.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the bench\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[365, 253, 491, 320]" } ] }, { "id": 4495, "image": "images/4495_testset_0001819.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the frisbee\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[182, 78, 216, 107]" } ] }, { "id": 6421, "image": "images/6421_ego4d_c2a3d4ac-8d13-4daa-a9a9-a867999e956f_15000.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[927, 122, 1035, 226]" } ] }, { "id": 22, "image": "images/22_5.png", "conversations": [ { "from": "human", "value": "\nYou are a robot using the joint control. The task is \"take the basket\". Please predict a possible affordance area of the end effector?" }, { "from": "gpt", "value": "[683, 213, 916, 351]" } ] }, { "id": 6463, "image": "images/6463_ego4d_002d2729-df71-438d-8396-5895b349e8fd_67200.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[779, 482, 938, 705]" } ] }, { "id": 2002, "image": "images/2002_testset_0000962.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"throw the soccer_ball\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[290, 170, 323, 200]" } ] }, { "id": 2335, "image": "images/2335_testset_0001233.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"drag the suitcase\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[143, 128, 170, 150]" } ] }, { "id": 4395, "image": "images/4395_wine_glass_002282.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the wine_glass\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[126, 236, 195, 299]" } ] }, { "id": 3638, "image": "images/3638_ego4d_6be77236-230f-4d9b-98d5-1cbf3be34023_5700.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"pour the cup\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[1524, 264, 1681, 446]" } ] }, { "id": 1031, "image": "images/1031_testset_0000902.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the bench\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[280, 355, 468, 402]" } ] }, { "id": 727, "image": "images/727_wine_glass_001599.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"drink_with the wine_glass\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[130, 186, 223, 243]" } ] }, { "id": 3089, "image": "images/3089_youtube_Good_006000.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[606, 312, 794, 476]" } ] }, { "id": 1344, "image": "images/1344_testset_0002270.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"sit_on the surfboard\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[124, 93, 322, 156]" } ] }, { "id": 7262, "image": "images/7262_golf_clubs_003141.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"swing the golf_clubs\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[818, 213, 948, 425]" } ] }, { "id": 6087, "image": "images/6087_knife_001875.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"stick the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[160, 284, 225, 339]" } ] }, { "id": 3650, "image": "images/3650_cup_000508.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"pour the cup\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[433, 250, 483, 313]" } ] }, { "id": 3108, "image": "images/3108_youtube_Why_013980.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[1294, 641, 1428, 779]" } ] }, { "id": 3816, "image": "images/3816_bottle_001227.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"hold the bottle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[87, 181, 220, 317]" } ] }, { "id": 6135, "image": "images/6135_youtube_POV 'From-Scratch' Meatballs & Spaghetti for My Wife First Person Cooking-RN4LfG5ZHE4-1080p-1644527646933_003840.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"stick the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[364, 171, 491, 295]" } ] }, { "id": 6688, "image": "images/6688_bicycle_001334.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"ride the bicycle\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[190, 163, 245, 210]" } ] }, { "id": 6355, "image": "images/6355_knife_001749.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[90, 181, 279, 224]" } ] }, { "id": 6391, "image": "images/6391_youtube_First Person Cooking CHICKEN QUESADILLAS JONAH IN THE KITCHEN POV COOKING-zma2SgJg6l0-1080p-1644528321483 (1)_007380.jpg", "conversations": [ { "value": "\nYou are a robot using the joint control. The task is \"cut_with the knife\". Please predict a possible affordance area of the end effector?", "from": "human" }, { "from": "gpt", "value": "[869, 0, 1031, 218]" } ] } ]