dung-vpt-uney commited on
Commit
f3839cb
·
1 Parent(s): 21b5285

Update Visual-CoT demo - 2025-10-12 23:59:41

Browse files

Fixes:
- Fix LLaVA config registration error (compatibility with newer transformers)
- Update Gradio to latest version (security fixes)
- Auto-deployed via update script

Files changed (1) hide show
  1. app.py +43 -26
app.py CHANGED
@@ -71,59 +71,53 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
71
  # Benchmark datasets from Visual Chain-of-Thought Reasoning Benchmarks Collection
72
  # https://huggingface.co/collections/tuandunghcmut/visual-chain-of-thought-reasoning-benchmarks
73
  BENCHMARK_DATASETS = {
74
- "Visual-CoT": {
75
- "path": "deepcs233/Visual-CoT",
76
- "config": None,
77
- "split": "train",
78
- "description": "Main Visual-CoT dataset with 438K question-answer pairs",
79
- },
80
  "GQA": {
81
  "path": "lmms-lab/GQA",
82
  "config": "train_balanced_images",
83
  "split": "train",
84
- "description": "Scene graph question answering (balanced training set)",
85
  },
86
  "RefCOCO": {
87
  "path": "lmms-lab/RefCOCO",
88
- "config": None,
89
- "split": "train",
90
- "description": "Referring expression comprehension (17.6K examples)",
91
  },
92
  "RefCOCO+": {
93
  "path": "lmms-lab/RefCOCOplus",
94
- "config": None,
95
- "split": "train",
96
- "description": "RefCOCO with no location words (7.58K examples)",
97
  },
98
  "RefCOCOg": {
99
  "path": "lmms-lab/RefCOCOg",
100
- "config": None,
101
- "split": "train",
102
- "description": "RefCOCO with longer expressions (12.6K examples)",
103
  },
104
  "POPE": {
105
  "path": "lmms-lab/POPE",
106
- "config": None,
107
  "split": "test",
108
- "description": "Polling-based Object Probing Evaluation (18K test examples)",
109
  },
110
  "ScienceQA": {
111
  "path": "lmms-lab/ScienceQA",
112
- "config": None,
113
- "split": "train",
114
- "description": "Science question answering (12.6K examples)",
115
  },
116
  "MM-GCoT": {
117
  "path": "AQUA6/MM-GCoT",
118
- "config": None,
119
  "split": "train",
120
- "description": "Multi-Modal Graph Chain-of-Thought (64.9K examples)",
121
  },
122
  "VGR": {
123
  "path": "BytedanceDouyinContent/VGR",
124
- "config": None,
125
  "split": "train",
126
- "description": "Visual Grounding & Reasoning (90K examples)",
127
  },
128
  }
129
 
@@ -224,7 +218,7 @@ def load_benchmark_example(dataset_name, index=0):
224
 
225
  # Load dataset with config and split
226
  print(f"Loading {dataset_name} from {dataset_path} (config={dataset_config}, split={dataset_split})...")
227
- if dataset_config:
228
  dataset = load_dataset(dataset_path, dataset_config, split=dataset_split, streaming=True)
229
  else:
230
  dataset = load_dataset(dataset_path, split=dataset_split, streaming=True)
@@ -262,6 +256,13 @@ def load_benchmark_example(dataset_name, index=0):
262
  traceback.print_exc()
263
  return None, error_msg, "", "", error_msg
264
 
 
 
 
 
 
 
 
265
  # =============================================================================
266
  # Utility Functions
267
  # =============================================================================
@@ -650,6 +651,16 @@ def create_demo():
650
 
651
  submit_btn = gr.Button("Run Analysis", variant="primary", size="lg")
652
  clear_btn = gr.Button("Clear", size="sm")
 
 
 
 
 
 
 
 
 
 
653
 
654
  with gr.Column(scale=1):
655
  # Output
@@ -732,6 +743,12 @@ def create_demo():
732
  fn=lambda: (None, "", "", "", None, ""),
733
  outputs=[image_input, question_input, bbox_output, answer_output, image_output, info_output],
734
  )
 
 
 
 
 
 
735
 
736
  # ============================================================
737
  # Tab 2: Benchmark Explorer
 
71
  # Benchmark datasets from Visual Chain-of-Thought Reasoning Benchmarks Collection
72
  # https://huggingface.co/collections/tuandunghcmut/visual-chain-of-thought-reasoning-benchmarks
73
  BENCHMARK_DATASETS = {
 
 
 
 
 
 
74
  "GQA": {
75
  "path": "lmms-lab/GQA",
76
  "config": "train_balanced_images",
77
  "split": "train",
78
+ "description": "Scene graph QA (72K balanced images)",
79
  },
80
  "RefCOCO": {
81
  "path": "lmms-lab/RefCOCO",
82
+ "config": "default",
83
+ "split": "val",
84
+ "description": "Referring expression comprehension (8.8K validation)",
85
  },
86
  "RefCOCO+": {
87
  "path": "lmms-lab/RefCOCOplus",
88
+ "config": "default",
89
+ "split": "val",
90
+ "description": "RefCOCO with no location words (3.8K validation)",
91
  },
92
  "RefCOCOg": {
93
  "path": "lmms-lab/RefCOCOg",
94
+ "config": "default",
95
+ "split": "val",
96
+ "description": "RefCOCO with longer expressions (7.5K validation)",
97
  },
98
  "POPE": {
99
  "path": "lmms-lab/POPE",
100
+ "config": "default",
101
  "split": "test",
102
+ "description": "Object probing evaluation (9K test)",
103
  },
104
  "ScienceQA": {
105
  "path": "lmms-lab/ScienceQA",
106
+ "config": "ScienceQA-FULL",
107
+ "split": "validation",
108
+ "description": "Science question answering (4.2K validation)",
109
  },
110
  "MM-GCoT": {
111
  "path": "AQUA6/MM-GCoT",
112
+ "config": "train",
113
  "split": "train",
114
+ "description": "Multi-Modal Graph CoT (63.9K training)",
115
  },
116
  "VGR": {
117
  "path": "BytedanceDouyinContent/VGR",
118
+ "config": "default",
119
  "split": "train",
120
+ "description": "Visual Grounding & Reasoning (90K training)",
121
  },
122
  }
123
 
 
218
 
219
  # Load dataset with config and split
220
  print(f"Loading {dataset_name} from {dataset_path} (config={dataset_config}, split={dataset_split})...")
221
+ if dataset_config and dataset_config != "None":
222
  dataset = load_dataset(dataset_path, dataset_config, split=dataset_split, streaming=True)
223
  else:
224
  dataset = load_dataset(dataset_path, split=dataset_split, streaming=True)
 
256
  traceback.print_exc()
257
  return None, error_msg, "", "", error_msg
258
 
259
+ def load_random_benchmark_example(dataset_name):
260
+ """Load a random example from benchmark for inference"""
261
+ import random
262
+ # Use random index between 0-99 for faster loading
263
+ random_index = random.randint(0, 99)
264
+ return load_benchmark_example(dataset_name, random_index)
265
+
266
  # =============================================================================
267
  # Utility Functions
268
  # =============================================================================
 
651
 
652
  submit_btn = gr.Button("Run Analysis", variant="primary", size="lg")
653
  clear_btn = gr.Button("Clear", size="sm")
654
+
655
+ gr.Markdown("---")
656
+ gr.Markdown("**Load Random Benchmark Example:**")
657
+ benchmark_select = gr.Dropdown(
658
+ choices=list(BENCHMARK_DATASETS.keys()),
659
+ value="GQA",
660
+ label="Select Benchmark",
661
+ scale=1,
662
+ )
663
+ load_random_btn = gr.Button("🎲 Load Random Example", variant="secondary")
664
 
665
  with gr.Column(scale=1):
666
  # Output
 
743
  fn=lambda: (None, "", "", "", None, ""),
744
  outputs=[image_input, question_input, bbox_output, answer_output, image_output, info_output],
745
  )
746
+
747
+ load_random_btn.click(
748
+ fn=load_random_benchmark_example,
749
+ inputs=[benchmark_select],
750
+ outputs=[image_input, question_input, bbox_output, answer_output, info_output],
751
+ )
752
 
753
  # ============================================================
754
  # Tab 2: Benchmark Explorer