dung-vpt-uney commited on
Commit
21b5285
·
1 Parent(s): 8699f67

Update Visual-CoT demo - 2025-10-12 23:47:44

Browse files

Fixes:
- Fix LLaVA config registration error (compatibility with newer transformers)
- Update Gradio to latest version (security fixes)
- Auto-deployed via update script

Files changed (1) hide show
  1. app.py +28 -3
app.py CHANGED
@@ -685,15 +685,40 @@ def create_demo():
685
  visible=False,
686
  )
687
 
688
- # Example images
689
- gr.Markdown("### 📋 Try These Examples")
690
  gr.Examples(
691
  examples=[
 
692
  ["examples/extreme_ironing.jpg", "What is unusual about this image?"],
693
  ["examples/waterview.jpg", "What are the things I should be cautious about when I visit here?"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694
  ],
695
  inputs=[image_input, question_input],
696
- label="Click to load example",
 
697
  )
698
 
699
  # Event handlers
 
685
  visible=False,
686
  )
687
 
688
+ # Example questions (20 diverse examples)
689
+ gr.Markdown("### 📋 Try These Example Questions")
690
  gr.Examples(
691
  examples=[
692
+ # Available images
693
  ["examples/extreme_ironing.jpg", "What is unusual about this image?"],
694
  ["examples/waterview.jpg", "What are the things I should be cautious about when I visit here?"],
695
+ # Visual reasoning examples (upload your own images)
696
+ [None, "What color is the car in the image?"],
697
+ [None, "How many people are in this picture?"],
698
+ [None, "What is the main object in the center of the image?"],
699
+ [None, "What is the person doing in this photo?"],
700
+ [None, "What time of day does this appear to be?"],
701
+ [None, "What is the weather like in this image?"],
702
+ [None, "What room is this photo taken in?"],
703
+ [None, "What brand or logo can you see?"],
704
+ # Text reading examples
705
+ [None, "What text is written on the sign?"],
706
+ [None, "What is the price shown in the image?"],
707
+ [None, "What does the document say?"],
708
+ [None, "What is the title of this book/poster?"],
709
+ # Spatial reasoning
710
+ [None, "What is to the left of the main object?"],
711
+ [None, "What is on top of the table?"],
712
+ [None, "Where is the person standing?"],
713
+ # Scene understanding
714
+ [None, "What type of place is this?"],
715
+ [None, "What activity is happening here?"],
716
+ [None, "What is the overall mood or atmosphere?"],
717
+ [None, "What can you infer about the context of this image?"],
718
  ],
719
  inputs=[image_input, question_input],
720
+ label="Click to load example questions (upload image for questions without images)",
721
+ examples_per_page=10,
722
  )
723
 
724
  # Event handlers