Deploy MobileCLIP classifier with Gradio interface
Browse files- README.md +47 -6
- app.py +280 -0
- handler.py +385 -0
- items.json +902 -0
- reparam.py +341 -0
- requirements.txt +7 -0
README.md
CHANGED
|
@@ -1,12 +1,53 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: MobileCLIP Image Classifier
|
| 3 |
+
emoji: 📸
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.44.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# 📸 MobileCLIP-B Image Classifier
|
| 14 |
+
|
| 15 |
+
Interactive web interface for Apple's MobileCLIP-B zero-shot image classification model.
|
| 16 |
+
|
| 17 |
+
## Features
|
| 18 |
+
|
| 19 |
+
- 🖼️ **Image Classification**: Upload any image for instant classification
|
| 20 |
+
- 🏷️ **Dynamic Labels**: Add and manage classification labels on-the-fly
|
| 21 |
+
- 📊 **Visual Results**: See confidence scores with interactive charts
|
| 22 |
+
- ⚡ **Fast Inference**: Optimized for < 30ms latency on GPU
|
| 23 |
+
- 🔒 **Admin Panel**: Secure label management interface
|
| 24 |
+
|
| 25 |
+
## Environment Variables
|
| 26 |
+
|
| 27 |
+
Configure these in your Space Settings (Settings → Variables and secrets):
|
| 28 |
+
|
| 29 |
+
| Variable | Description | Required |
|
| 30 |
+
|----------|-------------|----------|
|
| 31 |
+
| `ADMIN_TOKEN` | Secret token for admin operations | Yes (for admin features) |
|
| 32 |
+
| `HF_LABEL_REPO` | Hub dataset repo for label storage (e.g., `username/mobileclip-labels`) | No |
|
| 33 |
+
| `HF_WRITE_TOKEN` | Hugging Face token with write permissions | No |
|
| 34 |
+
| `HF_READ_TOKEN` | Hugging Face token with read permissions | No |
|
| 35 |
+
|
| 36 |
+
## How It Works
|
| 37 |
+
|
| 38 |
+
1. **Model**: Uses MobileCLIP-B with re-parameterized MobileOne blocks for efficient inference
|
| 39 |
+
2. **Labels**: Loads from `items.json` or dynamically from Hub repository
|
| 40 |
+
3. **Processing**: Pre-computes text embeddings for fast classification
|
| 41 |
+
4. **Interface**: Gradio provides the web UI with image upload and admin controls
|
| 42 |
+
|
| 43 |
+
## Admin Features
|
| 44 |
+
|
| 45 |
+
With proper authentication, admins can:
|
| 46 |
+
- Add new classification labels without redeploying
|
| 47 |
+
- Reload specific label versions from the Hub
|
| 48 |
+
- View current statistics and label information
|
| 49 |
+
|
| 50 |
+
## License
|
| 51 |
+
|
| 52 |
+
- Model weights: Apple Sample Code License (ASCL)
|
| 53 |
+
- Interface code: MIT
|
app.py
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import base64
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
from PIL import Image
|
| 6 |
+
import io
|
| 7 |
+
from handler import EndpointHandler
|
| 8 |
+
|
| 9 |
+
handler = EndpointHandler()
|
| 10 |
+
|
| 11 |
+
def classify_image(image, top_k=10):
|
| 12 |
+
"""
|
| 13 |
+
Main classification function for public interface.
|
| 14 |
+
"""
|
| 15 |
+
if image is None:
|
| 16 |
+
return None, "Please upload an image"
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
# Convert PIL image to base64
|
| 20 |
+
buffered = io.BytesIO()
|
| 21 |
+
image.save(buffered, format="PNG")
|
| 22 |
+
img_b64 = base64.b64encode(buffered.getvalue()).decode()
|
| 23 |
+
|
| 24 |
+
# Call handler
|
| 25 |
+
result = handler({
|
| 26 |
+
"inputs": {
|
| 27 |
+
"image": img_b64,
|
| 28 |
+
"top_k": int(top_k)
|
| 29 |
+
}
|
| 30 |
+
})
|
| 31 |
+
|
| 32 |
+
# Format results for display
|
| 33 |
+
if isinstance(result, list):
|
| 34 |
+
# Create formatted output
|
| 35 |
+
output_text = "**Top {} Classifications:**\n\n".format(len(result))
|
| 36 |
+
|
| 37 |
+
# Create a dictionary for the bar chart
|
| 38 |
+
chart_data = {}
|
| 39 |
+
|
| 40 |
+
for i, item in enumerate(result, 1):
|
| 41 |
+
score_pct = item['score'] * 100
|
| 42 |
+
output_text += f"{i}. **{item['label']}** (ID: {item['id']}): {score_pct:.2f}%\n"
|
| 43 |
+
chart_data[item['label']] = item['score']
|
| 44 |
+
|
| 45 |
+
return chart_data, output_text
|
| 46 |
+
else:
|
| 47 |
+
return None, f"Error: {result.get('error', 'Unknown error')}"
|
| 48 |
+
|
| 49 |
+
except Exception as e:
|
| 50 |
+
return None, f"Error: {str(e)}"
|
| 51 |
+
|
| 52 |
+
def upsert_labels_admin(admin_token, new_items_json):
|
| 53 |
+
"""
|
| 54 |
+
Admin function to add new labels.
|
| 55 |
+
"""
|
| 56 |
+
if not admin_token:
|
| 57 |
+
return "Error: Admin token required"
|
| 58 |
+
|
| 59 |
+
try:
|
| 60 |
+
# Parse the JSON input
|
| 61 |
+
items = json.loads(new_items_json) if new_items_json else []
|
| 62 |
+
|
| 63 |
+
result = handler({
|
| 64 |
+
"inputs": {
|
| 65 |
+
"op": "upsert_labels",
|
| 66 |
+
"token": admin_token,
|
| 67 |
+
"items": items
|
| 68 |
+
}
|
| 69 |
+
})
|
| 70 |
+
|
| 71 |
+
if result.get("status") == "ok":
|
| 72 |
+
return f"✅ Success! Added {result.get('added', 0)} new labels. Current version: {result.get('labels_version', 'unknown')}"
|
| 73 |
+
elif result.get("error") == "unauthorized":
|
| 74 |
+
return "❌ Error: Invalid admin token"
|
| 75 |
+
else:
|
| 76 |
+
return f"❌ Error: {result.get('detail', result.get('error', 'Unknown error'))}"
|
| 77 |
+
|
| 78 |
+
except json.JSONDecodeError:
|
| 79 |
+
return "❌ Error: Invalid JSON format"
|
| 80 |
+
except Exception as e:
|
| 81 |
+
return f"❌ Error: {str(e)}"
|
| 82 |
+
|
| 83 |
+
def reload_labels_admin(admin_token, version):
|
| 84 |
+
"""
|
| 85 |
+
Admin function to reload a specific label version.
|
| 86 |
+
"""
|
| 87 |
+
if not admin_token:
|
| 88 |
+
return "Error: Admin token required"
|
| 89 |
+
|
| 90 |
+
try:
|
| 91 |
+
result = handler({
|
| 92 |
+
"inputs": {
|
| 93 |
+
"op": "reload_labels",
|
| 94 |
+
"token": admin_token,
|
| 95 |
+
"version": int(version) if version else 1
|
| 96 |
+
}
|
| 97 |
+
})
|
| 98 |
+
|
| 99 |
+
if result.get("status") == "ok":
|
| 100 |
+
return f"✅ Labels reloaded successfully! Current version: {result.get('labels_version', 'unknown')}"
|
| 101 |
+
elif result.get("status") == "nochange":
|
| 102 |
+
return f"ℹ️ No change needed. Current version: {result.get('labels_version', 'unknown')}"
|
| 103 |
+
elif result.get("error") == "unauthorized":
|
| 104 |
+
return "❌ Error: Invalid admin token"
|
| 105 |
+
elif result.get("error") == "invalid_version":
|
| 106 |
+
return "❌ Error: Invalid version number"
|
| 107 |
+
else:
|
| 108 |
+
return f"❌ Error: {result.get('error', 'Unknown error')}"
|
| 109 |
+
|
| 110 |
+
except Exception as e:
|
| 111 |
+
return f"❌ Error: {str(e)}"
|
| 112 |
+
|
| 113 |
+
def get_current_stats():
|
| 114 |
+
"""
|
| 115 |
+
Get current label statistics.
|
| 116 |
+
"""
|
| 117 |
+
try:
|
| 118 |
+
num_labels = len(handler.class_ids) if hasattr(handler, 'class_ids') else 0
|
| 119 |
+
version = getattr(handler, 'labels_version', 1)
|
| 120 |
+
device = handler.device if hasattr(handler, 'device') else "unknown"
|
| 121 |
+
|
| 122 |
+
stats = f"""
|
| 123 |
+
**Current Statistics:**
|
| 124 |
+
- Number of labels: {num_labels}
|
| 125 |
+
- Labels version: {version}
|
| 126 |
+
- Device: {device}
|
| 127 |
+
- Model: MobileCLIP-B
|
| 128 |
+
"""
|
| 129 |
+
|
| 130 |
+
if hasattr(handler, 'class_names') and len(handler.class_names) > 0:
|
| 131 |
+
stats += f"\n- Sample labels: {', '.join(handler.class_names[:5])}"
|
| 132 |
+
if len(handler.class_names) > 5:
|
| 133 |
+
stats += "..."
|
| 134 |
+
|
| 135 |
+
return stats
|
| 136 |
+
except Exception as e:
|
| 137 |
+
return f"Error getting stats: {str(e)}"
|
| 138 |
+
|
| 139 |
+
# Create Gradio interface
|
| 140 |
+
with gr.Blocks(title="MobileCLIP Image Classifier") as demo:
|
| 141 |
+
gr.Markdown("""
|
| 142 |
+
# 🖼️ MobileCLIP-B Zero-Shot Image Classifier
|
| 143 |
+
|
| 144 |
+
Upload an image to classify it using MobileCLIP-B model with dynamic label management.
|
| 145 |
+
""")
|
| 146 |
+
|
| 147 |
+
with gr.Tab("🔍 Image Classification"):
|
| 148 |
+
with gr.Row():
|
| 149 |
+
with gr.Column():
|
| 150 |
+
input_image = gr.Image(
|
| 151 |
+
type="pil",
|
| 152 |
+
label="Upload Image"
|
| 153 |
+
)
|
| 154 |
+
top_k_slider = gr.Slider(
|
| 155 |
+
minimum=1,
|
| 156 |
+
maximum=50,
|
| 157 |
+
value=10,
|
| 158 |
+
step=1,
|
| 159 |
+
label="Number of top results to show"
|
| 160 |
+
)
|
| 161 |
+
classify_btn = gr.Button("🚀 Classify Image", variant="primary")
|
| 162 |
+
|
| 163 |
+
with gr.Column():
|
| 164 |
+
output_chart = gr.BarPlot(
|
| 165 |
+
label="Classification Confidence",
|
| 166 |
+
x_label="Label",
|
| 167 |
+
y_label="Confidence",
|
| 168 |
+
vertical=False,
|
| 169 |
+
height=400
|
| 170 |
+
)
|
| 171 |
+
output_text = gr.Markdown(label="Classification Results")
|
| 172 |
+
|
| 173 |
+
gr.Examples(
|
| 174 |
+
examples=[
|
| 175 |
+
["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/image_classifier/examples/cheetah.jpg"],
|
| 176 |
+
["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/image_classifier/examples/elephant.jpg"],
|
| 177 |
+
["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/image_classifier/examples/giraffe.jpg"]
|
| 178 |
+
],
|
| 179 |
+
inputs=input_image,
|
| 180 |
+
label="Example Images"
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
classify_btn.click(
|
| 184 |
+
classify_image,
|
| 185 |
+
inputs=[input_image, top_k_slider],
|
| 186 |
+
outputs=[output_chart, output_text]
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
with gr.Tab("🔧 Admin Panel"):
|
| 190 |
+
gr.Markdown("""
|
| 191 |
+
### Admin Functions
|
| 192 |
+
**Note:** Requires admin token (set via environment variable `ADMIN_TOKEN`)
|
| 193 |
+
""")
|
| 194 |
+
|
| 195 |
+
with gr.Row():
|
| 196 |
+
admin_token_input = gr.Textbox(
|
| 197 |
+
label="Admin Token",
|
| 198 |
+
type="password",
|
| 199 |
+
placeholder="Enter admin token"
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
with gr.Accordion("📊 Current Statistics", open=True):
|
| 203 |
+
stats_display = gr.Markdown(value=get_current_stats())
|
| 204 |
+
refresh_stats_btn = gr.Button("🔄 Refresh Stats")
|
| 205 |
+
refresh_stats_btn.click(
|
| 206 |
+
get_current_stats,
|
| 207 |
+
outputs=stats_display
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
with gr.Accordion("➕ Add New Labels", open=False):
|
| 211 |
+
gr.Markdown("""
|
| 212 |
+
Add new labels by providing JSON array:
|
| 213 |
+
```json
|
| 214 |
+
[
|
| 215 |
+
{"id": 100, "name": "new_object", "prompt": "a photo of a new_object"},
|
| 216 |
+
{"id": 101, "name": "another_object", "prompt": "a photo of another_object"}
|
| 217 |
+
]
|
| 218 |
+
```
|
| 219 |
+
""")
|
| 220 |
+
new_items_input = gr.Code(
|
| 221 |
+
label="New Items JSON",
|
| 222 |
+
language="json",
|
| 223 |
+
lines=5,
|
| 224 |
+
value='[\n {"id": 100, "name": "example", "prompt": "a photo of example"}\n]'
|
| 225 |
+
)
|
| 226 |
+
upsert_btn = gr.Button("➕ Add Labels", variant="primary")
|
| 227 |
+
upsert_output = gr.Markdown()
|
| 228 |
+
|
| 229 |
+
upsert_btn.click(
|
| 230 |
+
upsert_labels_admin,
|
| 231 |
+
inputs=[admin_token_input, new_items_input],
|
| 232 |
+
outputs=upsert_output
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
with gr.Accordion("🔄 Reload Label Version", open=False):
|
| 236 |
+
gr.Markdown("Reload labels from a specific version stored in the Hub")
|
| 237 |
+
version_input = gr.Number(
|
| 238 |
+
label="Version Number",
|
| 239 |
+
value=1,
|
| 240 |
+
precision=0
|
| 241 |
+
)
|
| 242 |
+
reload_btn = gr.Button("🔄 Reload Version", variant="primary")
|
| 243 |
+
reload_output = gr.Markdown()
|
| 244 |
+
|
| 245 |
+
reload_btn.click(
|
| 246 |
+
reload_labels_admin,
|
| 247 |
+
inputs=[admin_token_input, version_input],
|
| 248 |
+
outputs=reload_output
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
with gr.Tab("ℹ️ About"):
|
| 252 |
+
gr.Markdown("""
|
| 253 |
+
## About MobileCLIP-B Classifier
|
| 254 |
+
|
| 255 |
+
This Space provides a web interface for Apple's MobileCLIP-B model, optimized for fast zero-shot image classification.
|
| 256 |
+
|
| 257 |
+
### Features:
|
| 258 |
+
- 🚀 **Fast inference**: < 30ms on GPU
|
| 259 |
+
- 🏷️ **Dynamic labels**: Add/update labels without redeployment
|
| 260 |
+
- 🔄 **Version control**: Track and reload label versions
|
| 261 |
+
- 📊 **Visual results**: Bar charts and confidence scores
|
| 262 |
+
|
| 263 |
+
### Environment Variables (set in Space Settings):
|
| 264 |
+
- `ADMIN_TOKEN`: Secret token for admin operations
|
| 265 |
+
- `HF_LABEL_REPO`: Hub repository for label storage (e.g., "username/labels")
|
| 266 |
+
- `HF_WRITE_TOKEN`: Token with write permissions to label repo
|
| 267 |
+
- `HF_READ_TOKEN`: Token with read permissions (optional, defaults to write token)
|
| 268 |
+
|
| 269 |
+
### Model Details:
|
| 270 |
+
- **Architecture**: MobileCLIP-B with MobileOne blocks
|
| 271 |
+
- **Text Encoder**: Transformer-based, 77 token context
|
| 272 |
+
- **Image Size**: 224x224
|
| 273 |
+
- **Embedding Dim**: 512
|
| 274 |
+
|
| 275 |
+
### License:
|
| 276 |
+
Model weights are licensed under Apple Sample Code License (ASCL).
|
| 277 |
+
""")
|
| 278 |
+
|
| 279 |
+
if __name__ == "__main__":
|
| 280 |
+
demo.launch()
|
handler.py
ADDED
|
@@ -0,0 +1,385 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import contextlib, io, base64, torch, json, os, threading
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import open_clip
|
| 4 |
+
from huggingface_hub import hf_hub_download, create_commit, CommitOperationAdd
|
| 5 |
+
from safetensors.torch import save_file, load_file
|
| 6 |
+
from reparam import reparameterize_model
|
| 7 |
+
|
| 8 |
+
ADMIN_TOKEN = os.getenv("ADMIN_TOKEN", "")
|
| 9 |
+
HF_LABEL_REPO = os.getenv("HF_LABEL_REPO", "") # e.g. "org/mobileclip-labels"
|
| 10 |
+
HF_WRITE_TOKEN = os.getenv("HF_WRITE_TOKEN", "")
|
| 11 |
+
HF_READ_TOKEN = os.getenv("HF_READ_TOKEN", HF_WRITE_TOKEN)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _fingerprint(device: str, dtype: torch.dtype) -> dict:
|
| 15 |
+
return {
|
| 16 |
+
"model_id": "MobileCLIP-B",
|
| 17 |
+
"pretrained": "datacompdr",
|
| 18 |
+
"open_clip": getattr(open_clip, "__version__", "unknown"),
|
| 19 |
+
"torch": torch.__version__,
|
| 20 |
+
"cuda": torch.version.cuda if torch.cuda.is_available() else None,
|
| 21 |
+
"dtype_runtime": str(dtype),
|
| 22 |
+
"text_norm": "L2",
|
| 23 |
+
"logit_scale": 100.0,
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class EndpointHandler:
|
| 28 |
+
def __init__(self, path: str = ""):
|
| 29 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 30 |
+
self.dtype = torch.float16 if self.device == "cuda" else torch.float32
|
| 31 |
+
|
| 32 |
+
# 1) Load model + transforms
|
| 33 |
+
model, _, self.preprocess = open_clip.create_model_and_transforms(
|
| 34 |
+
"MobileCLIP-B", pretrained="datacompdr"
|
| 35 |
+
)
|
| 36 |
+
model.eval()
|
| 37 |
+
model = reparameterize_model(model)
|
| 38 |
+
model.to(self.device)
|
| 39 |
+
if self.device == "cuda":
|
| 40 |
+
model = model.to(torch.float16)
|
| 41 |
+
self.model = model
|
| 42 |
+
self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
|
| 43 |
+
self.fingerprint = _fingerprint(self.device, self.dtype)
|
| 44 |
+
self._lock = threading.Lock()
|
| 45 |
+
|
| 46 |
+
# 2) Try to load snapshot from Hub; else seed from items.json
|
| 47 |
+
loaded = False
|
| 48 |
+
if HF_LABEL_REPO:
|
| 49 |
+
with contextlib.suppress(Exception):
|
| 50 |
+
loaded = self._load_snapshot_from_hub_latest()
|
| 51 |
+
if not loaded:
|
| 52 |
+
with open(f"{path}/items.json", "r", encoding="utf-8") as f:
|
| 53 |
+
items = json.load(f)
|
| 54 |
+
prompts = [it["prompt"] for it in items]
|
| 55 |
+
self.class_ids = [int(it["id"]) for it in items]
|
| 56 |
+
self.class_names = [it["name"] for it in items]
|
| 57 |
+
with torch.no_grad():
|
| 58 |
+
toks = self.tokenizer(prompts).to(self.device)
|
| 59 |
+
feats = self.model.encode_text(toks)
|
| 60 |
+
feats = feats / feats.norm(dim=-1, keepdim=True)
|
| 61 |
+
self.text_features_cpu = feats.detach().cpu().to(torch.float32).contiguous()
|
| 62 |
+
self._to_device()
|
| 63 |
+
self.labels_version = 1
|
| 64 |
+
|
| 65 |
+
def __call__(self, data):
|
| 66 |
+
payload = data.get("inputs", data)
|
| 67 |
+
|
| 68 |
+
# Admin op: upsert_labels
|
| 69 |
+
op = payload.get("op")
|
| 70 |
+
if op == "upsert_labels":
|
| 71 |
+
if payload.get("token") != ADMIN_TOKEN:
|
| 72 |
+
return {"error": "unauthorized"}
|
| 73 |
+
items = payload.get("items", []) or []
|
| 74 |
+
added = self._upsert_items(items)
|
| 75 |
+
if added > 0:
|
| 76 |
+
new_ver = int(getattr(self, "labels_version", 1)) + 1
|
| 77 |
+
try:
|
| 78 |
+
self._persist_snapshot_to_hub(new_ver)
|
| 79 |
+
self.labels_version = new_ver
|
| 80 |
+
except Exception as e:
|
| 81 |
+
return {"status": "error", "added": added, "detail": str(e)}
|
| 82 |
+
return {"status": "ok", "added": added, "labels_version": getattr(self, "labels_version", 1)}
|
| 83 |
+
|
| 84 |
+
# Admin op: reload_labels
|
| 85 |
+
if op == "reload_labels":
|
| 86 |
+
if payload.get("token") != ADMIN_TOKEN:
|
| 87 |
+
return {"error": "unauthorized"}
|
| 88 |
+
try:
|
| 89 |
+
ver = int(payload.get("version"))
|
| 90 |
+
except Exception:
|
| 91 |
+
return {"error": "invalid_version"}
|
| 92 |
+
ok = self._load_snapshot_from_hub_version(ver)
|
| 93 |
+
return {"status": "ok" if ok else "nochange", "labels_version": getattr(self, "labels_version", 0)}
|
| 94 |
+
|
| 95 |
+
# Freshness guard (optional)
|
| 96 |
+
min_ver = payload.get("min_labels_version")
|
| 97 |
+
if isinstance(min_ver, int) and min_ver > getattr(self, "labels_version", 0):
|
| 98 |
+
with contextlib.suppress(Exception):
|
| 99 |
+
self._load_snapshot_from_hub_version(min_ver)
|
| 100 |
+
|
| 101 |
+
# Classification path (unchanged contract)
|
| 102 |
+
img_b64 = payload["image"]
|
| 103 |
+
image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
|
| 104 |
+
img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
|
| 105 |
+
if self.device == "cuda":
|
| 106 |
+
img_tensor = img_tensor.to(torch.float16)
|
| 107 |
+
with torch.no_grad():
|
| 108 |
+
img_feat = self.model.encode_image(img_tensor)
|
| 109 |
+
img_feat /= img_feat.norm(dim=-1, keepdim=True)
|
| 110 |
+
probs = (100.0 * img_feat @ self.text_features.T).softmax(dim=-1)[0]
|
| 111 |
+
results = zip(self.class_ids, self.class_names, probs.detach().cpu().tolist())
|
| 112 |
+
top_k = int(payload.get("top_k", len(self.class_ids)))
|
| 113 |
+
return sorted(
|
| 114 |
+
[{"id": i, "label": name, "score": float(p)} for i, name, p in results],
|
| 115 |
+
key=lambda x: x["score"],
|
| 116 |
+
reverse=True,
|
| 117 |
+
)[:top_k]
|
| 118 |
+
|
| 119 |
+
# ------------- helpers -------------
|
| 120 |
+
def _encode_text(self, prompts):
|
| 121 |
+
with torch.no_grad():
|
| 122 |
+
toks = self.tokenizer(prompts).to(self.device)
|
| 123 |
+
feats = self.model.encode_text(toks)
|
| 124 |
+
feats = feats / feats.norm(dim=-1, keepdim=True)
|
| 125 |
+
return feats
|
| 126 |
+
|
| 127 |
+
def _to_device(self):
|
| 128 |
+
self.text_features = self.text_features_cpu.to(
|
| 129 |
+
self.device, dtype=(torch.float16 if self.device == "cuda" else torch.float32)
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
def _upsert_items(self, new_items):
|
| 133 |
+
if not new_items:
|
| 134 |
+
return 0
|
| 135 |
+
with self._lock:
|
| 136 |
+
known = set(getattr(self, "class_ids", []))
|
| 137 |
+
batch = [it for it in new_items if int(it.get("id")) not in known]
|
| 138 |
+
if not batch:
|
| 139 |
+
return 0
|
| 140 |
+
prompts = [it["prompt"] for it in batch]
|
| 141 |
+
feats = self._encode_text(prompts).detach().cpu().to(torch.float32)
|
| 142 |
+
if not hasattr(self, "text_features_cpu"):
|
| 143 |
+
self.text_features_cpu = feats.contiguous()
|
| 144 |
+
self.class_ids = [int(it["id"]) for it in batch]
|
| 145 |
+
self.class_names = [it["name"] for it in batch]
|
| 146 |
+
else:
|
| 147 |
+
self.text_features_cpu = torch.cat([self.text_features_cpu, feats], dim=0).contiguous()
|
| 148 |
+
self.class_ids.extend([int(it["id"]) for it in batch])
|
| 149 |
+
self.class_names.extend([it["name"] for it in batch])
|
| 150 |
+
self._to_device()
|
| 151 |
+
return len(batch)
|
| 152 |
+
|
| 153 |
+
def _persist_snapshot_to_hub(self, version: int):
|
| 154 |
+
if not HF_LABEL_REPO:
|
| 155 |
+
raise RuntimeError("HF_LABEL_REPO not set")
|
| 156 |
+
if not HF_WRITE_TOKEN:
|
| 157 |
+
raise RuntimeError("HF_WRITE_TOKEN not set for publishing")
|
| 158 |
+
|
| 159 |
+
emb_path = "/tmp/embeddings.safetensors"
|
| 160 |
+
meta_path = "/tmp/meta.json"
|
| 161 |
+
latest_bytes = io.BytesIO(json.dumps({"version": int(version)}).encode("utf-8"))
|
| 162 |
+
|
| 163 |
+
save_file({"embeddings": self.text_features_cpu.to(torch.float32)}, emb_path)
|
| 164 |
+
meta = {
|
| 165 |
+
"items": [{"id": int(i), "name": n} for i, n in zip(self.class_ids, self.class_names)],
|
| 166 |
+
"fingerprint": self.fingerprint,
|
| 167 |
+
"dims": int(self.text_features_cpu.shape[1]),
|
| 168 |
+
"count": int(self.text_features_cpu.shape[0]),
|
| 169 |
+
"version": int(version),
|
| 170 |
+
}
|
| 171 |
+
with open(meta_path, "w", encoding="utf-8") as f:
|
| 172 |
+
json.dump(meta, f)
|
| 173 |
+
|
| 174 |
+
ops = [
|
| 175 |
+
CommitOperationAdd(
|
| 176 |
+
path_in_repo=f"snapshots/v{version}/embeddings.safetensors",
|
| 177 |
+
path_or_fileobj=emb_path,
|
| 178 |
+
lfs=True,
|
| 179 |
+
),
|
| 180 |
+
CommitOperationAdd(
|
| 181 |
+
path_in_repo=f"snapshots/v{version}/meta.json",
|
| 182 |
+
path_or_fileobj=meta_path,
|
| 183 |
+
),
|
| 184 |
+
CommitOperationAdd(
|
| 185 |
+
path_in_repo="snapshots/latest.json",
|
| 186 |
+
path_or_fileobj=latest_bytes,
|
| 187 |
+
),
|
| 188 |
+
]
|
| 189 |
+
create_commit(
|
| 190 |
+
repo_id=HF_LABEL_REPO,
|
| 191 |
+
repo_type="dataset",
|
| 192 |
+
operations=ops,
|
| 193 |
+
token=HF_WRITE_TOKEN,
|
| 194 |
+
commit_message=f"labels v{version}",
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
def _load_snapshot_from_hub_version(self, version: int) -> bool:
|
| 198 |
+
if not HF_LABEL_REPO:
|
| 199 |
+
return False
|
| 200 |
+
with self._lock:
|
| 201 |
+
emb_p = hf_hub_download(
|
| 202 |
+
HF_LABEL_REPO,
|
| 203 |
+
f"snapshots/v{version}/embeddings.safetensors",
|
| 204 |
+
repo_type="dataset",
|
| 205 |
+
token=HF_READ_TOKEN,
|
| 206 |
+
force_download=True,
|
| 207 |
+
)
|
| 208 |
+
meta_p = hf_hub_download(
|
| 209 |
+
HF_LABEL_REPO,
|
| 210 |
+
f"snapshots/v{version}/meta.json",
|
| 211 |
+
repo_type="dataset",
|
| 212 |
+
token=HF_READ_TOKEN,
|
| 213 |
+
force_download=True,
|
| 214 |
+
)
|
| 215 |
+
meta = json.load(open(meta_p, "r", encoding="utf-8"))
|
| 216 |
+
if meta.get("fingerprint") != self.fingerprint:
|
| 217 |
+
raise RuntimeError("Embedding/model fingerprint mismatch")
|
| 218 |
+
feats = load_file(emb_p)["embeddings"] # float32 CPU
|
| 219 |
+
self.text_features_cpu = feats.contiguous()
|
| 220 |
+
self.class_ids = [int(x["id"]) for x in meta.get("items", [])]
|
| 221 |
+
self.class_names = [x["name"] for x in meta.get("items", [])]
|
| 222 |
+
self.labels_version = int(meta.get("version", version))
|
| 223 |
+
self._to_device()
|
| 224 |
+
return True
|
| 225 |
+
|
| 226 |
+
def _load_snapshot_from_hub_latest(self) -> bool:
|
| 227 |
+
if not HF_LABEL_REPO:
|
| 228 |
+
return False
|
| 229 |
+
try:
|
| 230 |
+
latest_p = hf_hub_download(
|
| 231 |
+
HF_LABEL_REPO,
|
| 232 |
+
"snapshots/latest.json",
|
| 233 |
+
repo_type="dataset",
|
| 234 |
+
token=HF_READ_TOKEN,
|
| 235 |
+
)
|
| 236 |
+
except Exception:
|
| 237 |
+
return False
|
| 238 |
+
latest = json.load(open(latest_p, "r", encoding="utf-8"))
|
| 239 |
+
ver = int(latest.get("version", 0))
|
| 240 |
+
if ver <= 0:
|
| 241 |
+
return False
|
| 242 |
+
return self._load_snapshot_from_hub_version(ver)
|
| 243 |
+
|
| 244 |
+
# """
|
| 245 |
+
# MobileCLIP‑B Zero‑Shot Image Classifier (Hugging Face Inference Endpoint)
|
| 246 |
+
# ===========================================================================
|
| 247 |
+
|
| 248 |
+
# * One container instance is created per replica; the `EndpointHandler`
|
| 249 |
+
# object below is instantiated exactly **once** at start‑up.
|
| 250 |
+
|
| 251 |
+
# * At request time (`__call__`) we receive a base‑64‑encoded image, run a
|
| 252 |
+
# **single forward pass**, and return class probabilities.
|
| 253 |
+
|
| 254 |
+
# Design choices
|
| 255 |
+
# --------------
|
| 256 |
+
|
| 257 |
+
# 1. **Model & transform come from OpenCLIP**
|
| 258 |
+
# This guarantees we apply **identical preprocessing** to what the model
|
| 259 |
+
# was trained with (224 × 224 crop + mean/std normalisation).
|
| 260 |
+
|
| 261 |
+
# 2. **Re‑parameterisation for inference**
|
| 262 |
+
# MobileCLIP uses MobileOne blocks that have extra convolution branches
|
| 263 |
+
# for training; `reparameterize_model` fuses them so inference is fast
|
| 264 |
+
# and deterministic.
|
| 265 |
+
|
| 266 |
+
# 3. **Text embeddings are cached**
|
| 267 |
+
# The class “prompts” (e.g. `"a photo of a cat"`) are encoded **once at
|
| 268 |
+
# start‑up**. Each request therefore encodes *only* the image and
|
| 269 |
+
# performs a single matrix multiplication.
|
| 270 |
+
|
| 271 |
+
# 4. **Mixed precision on GPU**
|
| 272 |
+
# If the container has CUDA, we cast the model **and** inputs to
|
| 273 |
+
# `float16`. That halves memory and roughly doubles throughput on most
|
| 274 |
+
# modern GPUs. On CPU we stay in `float32` for numerical stability.
|
| 275 |
+
# """
|
| 276 |
+
|
| 277 |
+
# import contextlib, io, base64, json
|
| 278 |
+
# from pathlib import Path
|
| 279 |
+
# from typing import Any, Dict, List
|
| 280 |
+
|
| 281 |
+
# import torch
|
| 282 |
+
# from PIL import Image
|
| 283 |
+
# import open_clip
|
| 284 |
+
|
| 285 |
+
# from reparam import reparameterize_model # local copy (~60 LoC) of Apple’s helper
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
# class EndpointHandler:
|
| 289 |
+
# """
|
| 290 |
+
# Hugging Face entry‑point. The toolkit will instantiate this class
|
| 291 |
+
# once and call it for every HTTP request.
|
| 292 |
+
|
| 293 |
+
# Parameters
|
| 294 |
+
# ----------
|
| 295 |
+
# path : str, optional
|
| 296 |
+
# Root directory of the repository. HF mounts the code under
|
| 297 |
+
# `/repository`; we use this path to locate `items.json`.
|
| 298 |
+
# """
|
| 299 |
+
|
| 300 |
+
# # ------------------------------------------------------------------ #
|
| 301 |
+
# # INITIALISATION (runs **once**) #
|
| 302 |
+
# # ------------------------------------------------------------------ #
|
| 303 |
+
# def __init__(self, path: str = "") -> None:
|
| 304 |
+
# self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 305 |
+
|
| 306 |
+
# # 1️⃣ Load MobileCLIP‑B weights & transforms -------------------
|
| 307 |
+
# # `pretrained="datacompdr"` makes OpenCLIP download the
|
| 308 |
+
# # official checkpoint from the Hub (cached in the image layer).
|
| 309 |
+
# model, _, self.preprocess = open_clip.create_model_and_transforms(
|
| 310 |
+
# "MobileCLIP-B", pretrained="datacompdr"
|
| 311 |
+
# )
|
| 312 |
+
# model.eval() # disable dropout / BN updates
|
| 313 |
+
# model = reparameterize_model(model) # fuse MobileOne branches
|
| 314 |
+
# model.to(self.device)
|
| 315 |
+
# if self.device == "cuda":
|
| 316 |
+
# model = model.to(torch.float16) # FP16 for throughput
|
| 317 |
+
# self.model = model # hold a reference
|
| 318 |
+
|
| 319 |
+
# # 2️⃣ Build the tokenizer once --------------------------------
|
| 320 |
+
# tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
|
| 321 |
+
|
| 322 |
+
# # 3️⃣ Load class metadata -------------------------------------
|
| 323 |
+
# # Expect JSON file: [{"id": 3, "name": "cat", "prompt": "cat"}, …]
|
| 324 |
+
# items_path = Path(path) / "items.json"
|
| 325 |
+
# with items_path.open("r", encoding="utf-8") as f:
|
| 326 |
+
# class_defs: List[Dict[str, Any]] = json.load(f)
|
| 327 |
+
|
| 328 |
+
# # Extract the bits we need later
|
| 329 |
+
# prompts = [item["prompt"] for item in class_defs]
|
| 330 |
+
# self.class_ids: List[int] = [item["id"] for item in class_defs]
|
| 331 |
+
# self.class_names: List[str] = [item["name"] for item in class_defs]
|
| 332 |
+
|
| 333 |
+
# # 4️⃣ Encode all prompts once ---------------------------------
|
| 334 |
+
# with torch.no_grad():
|
| 335 |
+
# text_tokens = tokenizer(prompts).to(self.device)
|
| 336 |
+
# text_feats = self.model.encode_text(text_tokens)
|
| 337 |
+
# text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
|
| 338 |
+
# self.text_features = text_feats # [num_classes, 512]
|
| 339 |
+
|
| 340 |
+
# # ------------------------------------------------------------------ #
|
| 341 |
+
# # INFERENCE CALL #
|
| 342 |
+
# # ------------------------------------------------------------------ #
|
| 343 |
+
# def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 344 |
+
# """
|
| 345 |
+
# Parameters
|
| 346 |
+
# ----------
|
| 347 |
+
# data : dict
|
| 348 |
+
# Either the raw payload `{"image": "<base64>"}` **or** the
|
| 349 |
+
# Hugging Face convention `{"inputs": {...}}`.
|
| 350 |
+
|
| 351 |
+
# Returns
|
| 352 |
+
# -------
|
| 353 |
+
# list of dict
|
| 354 |
+
# Sorted list of `{"id": int, "label": str, "score": float}`.
|
| 355 |
+
# Scores are the softmax probabilities over the *provided*
|
| 356 |
+
# class list (they sum to 1.0).
|
| 357 |
+
# """
|
| 358 |
+
# # 1️⃣ Unpack the request payload ------------------------------
|
| 359 |
+
# payload: Dict[str, Any] = data.get("inputs", data)
|
| 360 |
+
# img_b64: str = payload["image"]
|
| 361 |
+
|
| 362 |
+
# # 2️⃣ Decode + preprocess -------------------------------------
|
| 363 |
+
# image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
|
| 364 |
+
# img_tensor = self.preprocess(image).unsqueeze(0).to(self.device) # [1, 3, 224, 224]
|
| 365 |
+
# if self.device == "cuda":
|
| 366 |
+
# img_tensor = img_tensor.to(torch.float16)
|
| 367 |
+
|
| 368 |
+
# # 3️⃣ Forward pass (image only) -------------------------------
|
| 369 |
+
# with torch.no_grad(): # no autograd graph
|
| 370 |
+
# img_feat = self.model.encode_image(img_tensor) # [1, 512]
|
| 371 |
+
# img_feat = img_feat / img_feat.norm(dim=-1, keepdim=True) # L2‑normalise
|
| 372 |
+
|
| 373 |
+
# # cosine similarity → logits → softmax probabilities
|
| 374 |
+
# probs = (100 * img_feat @ self.text_features.T).softmax(dim=-1)[0] # [num_classes]
|
| 375 |
+
|
| 376 |
+
# # 4️⃣ Assemble JSON‑serialisable response ---------------------
|
| 377 |
+
# results = zip(self.class_ids, self.class_names, probs.cpu().tolist())
|
| 378 |
+
# return sorted(
|
| 379 |
+
# [{"id": cid, "label": name, "score": float(p)} for cid, name, p in results],
|
| 380 |
+
# key=lambda x: x["score"],
|
| 381 |
+
# reverse=True,
|
| 382 |
+
# )
|
| 383 |
+
|
| 384 |
+
|
| 385 |
+
|
items.json
ADDED
|
@@ -0,0 +1,902 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"name": "Laptop",
|
| 4 |
+
"short_description": "Laptops are portable computers that can help you play games and learn new things. They can fold up just like a book!",
|
| 5 |
+
"category": "Office",
|
| 6 |
+
"rarity": "common",
|
| 7 |
+
"fun_fact": "Laptops have tiny fans to keep them cool while working.",
|
| 8 |
+
"id": 0,
|
| 9 |
+
"prompt": "a photo of a laptop computer (also called a notebook computer)",
|
| 10 |
+
"slug": "laptop"
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"name": "Pen",
|
| 14 |
+
"short_description": "Pens are used to write and draw. They come in many colors.",
|
| 15 |
+
"category": "Office",
|
| 16 |
+
"rarity": "common",
|
| 17 |
+
"fun_fact": "Pens can write underwater using special waterproof ink.",
|
| 18 |
+
"id": 1,
|
| 19 |
+
"prompt": "a photo of an ink pen for writing",
|
| 20 |
+
"slug": "pen"
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"name": "Notebook",
|
| 24 |
+
"short_description": "A notebook is a book with paper inside. You can write or draw in it!",
|
| 25 |
+
"category": "Office",
|
| 26 |
+
"rarity": "common",
|
| 27 |
+
"fun_fact": "Leonardo da Vinci used notebooks to sketch out their brilliant ideas, like flying machines!",
|
| 28 |
+
"id": 2,
|
| 29 |
+
"prompt": "a photo of a paper notebook for writing (stationery)",
|
| 30 |
+
"slug": "notebook"
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"name": "Stapler",
|
| 34 |
+
"short_description": "A stapler helps you keep papers together by using metal clips.",
|
| 35 |
+
"category": "Office",
|
| 36 |
+
"rarity": "common",
|
| 37 |
+
"fun_fact": "The first stapler was made for King Louis XV in France.",
|
| 38 |
+
"id": 3,
|
| 39 |
+
"prompt": "a photo of a stapler (office paper stapler)",
|
| 40 |
+
"slug": "stapler"
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"name": "Highlighter",
|
| 44 |
+
"short_description": "A highlighter is a bright pen used to mark important words or pictures.",
|
| 45 |
+
"category": "Office",
|
| 46 |
+
"rarity": "rare",
|
| 47 |
+
"fun_fact": "Highlighter ink glows under black light, like magic!",
|
| 48 |
+
"id": 4,
|
| 49 |
+
"prompt": "a photo of a highlighter pen (also called a fluorescent marker)",
|
| 50 |
+
"slug": "highlighter"
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"name": "Paper Clip",
|
| 54 |
+
"short_description": "Paper clips are tiny helpers that keep papers together. They're shiny and colorful!",
|
| 55 |
+
"category": "Office",
|
| 56 |
+
"rarity": "rare",
|
| 57 |
+
"fun_fact": "The first paper clip was made over 100 years ago.",
|
| 58 |
+
"id": 5,
|
| 59 |
+
"prompt": "a photo of a paper clip (wire office clip)",
|
| 60 |
+
"slug": "paper_clip"
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"name": "Desk Chair",
|
| 64 |
+
"short_description": "A desk chair is a seat for working or studying. It can roll and spin!",
|
| 65 |
+
"category": "Office",
|
| 66 |
+
"rarity": "rare",
|
| 67 |
+
"fun_fact": "Early chairs were just wooden and had no cushions.",
|
| 68 |
+
"id": 6,
|
| 69 |
+
"prompt": "a photo of a desk chair (office chair)",
|
| 70 |
+
"slug": "desk_chair"
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"name": "Printer",
|
| 74 |
+
"short_description": "A printer makes copies of what's on a computer. It uses paper and ink.",
|
| 75 |
+
"category": "Office",
|
| 76 |
+
"rarity": "super rare",
|
| 77 |
+
"fun_fact": "Printers make pictures from millions of tiny dots.",
|
| 78 |
+
"id": 7,
|
| 79 |
+
"prompt": "a photo of a document printer (inkjet or laser printer)",
|
| 80 |
+
"slug": "printer"
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"name": "Coffee Mug",
|
| 84 |
+
"short_description": "This is a cup with a handle to drink from. It's great for hot drinks!",
|
| 85 |
+
"category": "Office",
|
| 86 |
+
"rarity": "super rare",
|
| 87 |
+
"fun_fact": "Some mugs can change color or show pictures when they get hot.",
|
| 88 |
+
"id": 8,
|
| 89 |
+
"prompt": "a photo of a coffee mug (cup with a handle)",
|
| 90 |
+
"slug": "coffee_mug"
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"name": "Pencil",
|
| 94 |
+
"short_description": "A pencil lets you draw and write. It's long, thin, and has an eraser.",
|
| 95 |
+
"category": "School",
|
| 96 |
+
"rarity": "common",
|
| 97 |
+
"fun_fact": "A single pencil can draw a line about 35 miles long.",
|
| 98 |
+
"id": 9,
|
| 99 |
+
"prompt": "a photo of a wooden graphite pencil (for writing)",
|
| 100 |
+
"slug": "pencil"
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"name": "Backpack",
|
| 104 |
+
"short_description": "A backpack is a bag worn on your back. It holds all your school supplies!",
|
| 105 |
+
"category": "School",
|
| 106 |
+
"rarity": "common",
|
| 107 |
+
"fun_fact": "Some backpacks have hidden pockets for secret treasures.",
|
| 108 |
+
"id": 10,
|
| 109 |
+
"prompt": "a photo of a backpack (school bag)",
|
| 110 |
+
"slug": "backpack"
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"name": "Book",
|
| 114 |
+
"short_description": "A book has pages with words and pictures. You can learn or enjoy stories.",
|
| 115 |
+
"category": "School",
|
| 116 |
+
"rarity": "common",
|
| 117 |
+
"fun_fact": "Book pages are called leaves, just like trees!",
|
| 118 |
+
"id": 11,
|
| 119 |
+
"prompt": "a photo of a printed book (paper book)",
|
| 120 |
+
"slug": "book"
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"name": "Crayon",
|
| 124 |
+
"short_description": "Crayons are colorful sticks used to draw and color. They're easy to hold!",
|
| 125 |
+
"category": "School",
|
| 126 |
+
"rarity": "common",
|
| 127 |
+
"fun_fact": "Crayons are wax sticks; your warm hands help them draw smoothly.",
|
| 128 |
+
"id": 12,
|
| 129 |
+
"prompt": "a photo of a wax crayon (drawing crayon)",
|
| 130 |
+
"slug": "crayon"
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"name": "Glue Stick",
|
| 134 |
+
"short_description": "A glue stick is used to stick paper and crafts. Just roll and glue!",
|
| 135 |
+
"category": "School",
|
| 136 |
+
"rarity": "common",
|
| 137 |
+
"fun_fact": "Some glue sticks glow in the dark for nighttime crafting fun!",
|
| 138 |
+
"id": 13,
|
| 139 |
+
"prompt": "a photo of a glue stick (solid glue)",
|
| 140 |
+
"slug": "glue_stick"
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"name": "Ruler",
|
| 144 |
+
"short_description": "A ruler is used to measure how long things are.",
|
| 145 |
+
"category": "School",
|
| 146 |
+
"rarity": "rare",
|
| 147 |
+
"fun_fact": "Most rulers are 12 inches long — exactly one foot, just like your shoe!",
|
| 148 |
+
"id": 14,
|
| 149 |
+
"prompt": "a photo of a measuring ruler (straightedge)",
|
| 150 |
+
"slug": "ruler"
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"name": "Lunchbox",
|
| 154 |
+
"short_description": "A lunchbox holds your yummy food for school. It keeps everything safe!",
|
| 155 |
+
"category": "School",
|
| 156 |
+
"rarity": "rare",
|
| 157 |
+
"fun_fact": "Thick, squishy walls help keep your lunch cool and fresh.",
|
| 158 |
+
"id": 15,
|
| 159 |
+
"prompt": "a photo of a lunchbox (school lunch box)",
|
| 160 |
+
"slug": "lunchbox"
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"name": "Eraser",
|
| 164 |
+
"short_description": "Erasers help remove pencil marks. They're like magic fixers for your writing!",
|
| 165 |
+
"category": "School",
|
| 166 |
+
"rarity": "rare",
|
| 167 |
+
"fun_fact": "Erasers were first made from bread crumbs in ancient Japan.",
|
| 168 |
+
"id": 16,
|
| 169 |
+
"prompt": "a photo of an eraser (rubber eraser)",
|
| 170 |
+
"slug": "eraser"
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"name": "Classroom Globe",
|
| 174 |
+
"short_description": "A globe shows us where countries and oceans are. It spins!",
|
| 175 |
+
"category": "School",
|
| 176 |
+
"rarity": "super rare",
|
| 177 |
+
"fun_fact": "A globe shows what our Earth looks like from space!",
|
| 178 |
+
"id": 17,
|
| 179 |
+
"prompt": "a photo of a classroom globe (world globe)",
|
| 180 |
+
"slug": "classroom_globe"
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"name": "Daisy",
|
| 184 |
+
"short_description": "A daisy is a cute flower with white petals and a yellow center.",
|
| 185 |
+
"category": "Garden",
|
| 186 |
+
"rarity": "common",
|
| 187 |
+
"fun_fact": "Daisy flowers close their petals at night and open again in the morning.",
|
| 188 |
+
"id": 18,
|
| 189 |
+
"prompt": "a photo of a daisy (flower)",
|
| 190 |
+
"slug": "daisy"
|
| 191 |
+
},
|
| 192 |
+
{
|
| 193 |
+
"name": "Rock",
|
| 194 |
+
"short_description": "Rocks are hard and come in different shapes and sizes. You can find them almost anywhere!",
|
| 195 |
+
"category": "Garden",
|
| 196 |
+
"rarity": "common",
|
| 197 |
+
"fun_fact": "Some rocks were once melted lava from volcanoes, then cooled and hardened.",
|
| 198 |
+
"id": 19,
|
| 199 |
+
"prompt": "a photo of a rock (stone)",
|
| 200 |
+
"slug": "rock"
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"name": "Leaf",
|
| 204 |
+
"short_description": "Leaves grow on trees and plants. They can be green, red, or yellow!",
|
| 205 |
+
"category": "Garden",
|
| 206 |
+
"rarity": "common",
|
| 207 |
+
"fun_fact": "Leaves make food for plants using sunlight, water, and air.",
|
| 208 |
+
"id": 20,
|
| 209 |
+
"prompt": "a photo of a leaf (plant leaf)",
|
| 210 |
+
"slug": "leaf"
|
| 211 |
+
},
|
| 212 |
+
{
|
| 213 |
+
"name": "Stick",
|
| 214 |
+
"short_description": "Sticks are long and hard, great for building or playing pretend!",
|
| 215 |
+
"category": "Garden",
|
| 216 |
+
"rarity": "common",
|
| 217 |
+
"fun_fact": "Sticks make great homes for small bugs and spiders.",
|
| 218 |
+
"id": 21,
|
| 219 |
+
"prompt": "a photo of a stick (tree branch or twig)",
|
| 220 |
+
"slug": "stick"
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"name": "Watering Can",
|
| 224 |
+
"short_description": "A watering can helps us water plants! Pour water through the spout.",
|
| 225 |
+
"category": "Garden",
|
| 226 |
+
"rarity": "common",
|
| 227 |
+
"fun_fact": "The sprinkler cap at the end of the spout is called a 'rose'!",
|
| 228 |
+
"id": 22,
|
| 229 |
+
"prompt": "a photo of a watering can (garden watering can)",
|
| 230 |
+
"slug": "watering_can"
|
| 231 |
+
},
|
| 232 |
+
{
|
| 233 |
+
"name": "Snail",
|
| 234 |
+
"short_description": "Snails are little animals with shells on their backs. They move very slowly.",
|
| 235 |
+
"category": "Garden",
|
| 236 |
+
"rarity": "rare",
|
| 237 |
+
"fun_fact": "Snails make slippery slime so they can slide without scratching.",
|
| 238 |
+
"id": 23,
|
| 239 |
+
"prompt": "a photo of a snail (animal with a spiral shell)",
|
| 240 |
+
"slug": "snail"
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"name": "Ladybug",
|
| 244 |
+
"short_description": "Ladybugs are small, red insects with black spots. They love to eat pesky bugs.",
|
| 245 |
+
"category": "Garden",
|
| 246 |
+
"rarity": "rare",
|
| 247 |
+
"fun_fact": "Ladybugs can have up to seven spots — count them if you can!",
|
| 248 |
+
"id": 24,
|
| 249 |
+
"prompt": "a photo of a ladybug (also called a ladybird beetle)",
|
| 250 |
+
"slug": "ladybug"
|
| 251 |
+
},
|
| 252 |
+
{
|
| 253 |
+
"name": "Birdhouse",
|
| 254 |
+
"short_description": "A birdhouse is a little house for birds. They stay safe and cozy inside.",
|
| 255 |
+
"category": "Garden",
|
| 256 |
+
"rarity": "super rare",
|
| 257 |
+
"fun_fact": "Some birds like to decorate their houses with shiny objects!",
|
| 258 |
+
"id": 25,
|
| 259 |
+
"prompt": "a photo of a birdhouse (nesting box)",
|
| 260 |
+
"slug": "birdhouse"
|
| 261 |
+
},
|
| 262 |
+
{
|
| 263 |
+
"name": "Garden Gnome",
|
| 264 |
+
"short_description": "A garden gnome is a small statue that lives in gardens and brings fun.",
|
| 265 |
+
"category": "Garden",
|
| 266 |
+
"rarity": "super rare",
|
| 267 |
+
"fun_fact": "Garden gnomes were once believed to guard treasures buried underground.",
|
| 268 |
+
"id": 26,
|
| 269 |
+
"prompt": "a photo of a garden gnome (garden statue)",
|
| 270 |
+
"slug": "garden_gnome"
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"name": "Swing",
|
| 274 |
+
"short_description": "A swing is a seat that hangs from ropes or chains. It's super fun to swing high!",
|
| 275 |
+
"category": "Playground",
|
| 276 |
+
"rarity": "common",
|
| 277 |
+
"fun_fact": "Pumping your legs makes you go higher — each pump gives the swing a push.",
|
| 278 |
+
"id": 27,
|
| 279 |
+
"prompt": "a photo of a swing (playground swing)",
|
| 280 |
+
"slug": "swing"
|
| 281 |
+
},
|
| 282 |
+
{
|
| 283 |
+
"name": "Slide",
|
| 284 |
+
"short_description": "Slides are fun surfaces to quickly slide down. They can be found in playgrounds.",
|
| 285 |
+
"category": "Playground",
|
| 286 |
+
"rarity": "common",
|
| 287 |
+
"fun_fact": "Slides can be found in shapes like spirals and waves.",
|
| 288 |
+
"id": 28,
|
| 289 |
+
"prompt": "a photo of a slide (playground slide)",
|
| 290 |
+
"slug": "slide"
|
| 291 |
+
},
|
| 292 |
+
{
|
| 293 |
+
"name": "Ball",
|
| 294 |
+
"short_description": "Balls are round toys you can throw, catch, or kick. They're lots of fun!",
|
| 295 |
+
"category": "Playground",
|
| 296 |
+
"rarity": "common",
|
| 297 |
+
"fun_fact": "Balls are spheres, so they roll easily in any direction.",
|
| 298 |
+
"id": 29,
|
| 299 |
+
"prompt": "a photo of a ball (toy ball)",
|
| 300 |
+
"slug": "ball"
|
| 301 |
+
},
|
| 302 |
+
{
|
| 303 |
+
"name": "Sandbox",
|
| 304 |
+
"short_description": "A sandbox is a fun place where kids can play with sand. You can build castles and dig holes!",
|
| 305 |
+
"category": "Playground",
|
| 306 |
+
"rarity": "common",
|
| 307 |
+
"fun_fact": "Sand can be different colors like yellow, white, and even pink!",
|
| 308 |
+
"id": 30,
|
| 309 |
+
"prompt": "a photo of a sandbox (box of play sand)",
|
| 310 |
+
"slug": "sandbox"
|
| 311 |
+
},
|
| 312 |
+
{
|
| 313 |
+
"name": "Seesaw",
|
| 314 |
+
"short_description": "A seesaw is a long board that goes up and down. Friends can play on it together!",
|
| 315 |
+
"category": "Playground",
|
| 316 |
+
"rarity": "common",
|
| 317 |
+
"fun_fact": "Playing on a seesaw helps improve your balance and teamwork.",
|
| 318 |
+
"id": 31,
|
| 319 |
+
"prompt": "a photo of a seesaw (teeter-totter)",
|
| 320 |
+
"slug": "seesaw"
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"name": "Hula Hoop",
|
| 324 |
+
"short_description": "A hula hoop is a big round toy you can spin around your waist. It's super fun!",
|
| 325 |
+
"category": "Playground",
|
| 326 |
+
"rarity": "rare",
|
| 327 |
+
"fun_fact": "The world record is over 100 hours of hula hooping!",
|
| 328 |
+
"id": 32,
|
| 329 |
+
"prompt": "a photo of a hula hoop (toy hoop)",
|
| 330 |
+
"slug": "hula_hoop"
|
| 331 |
+
},
|
| 332 |
+
{
|
| 333 |
+
"name": "Frisbee",
|
| 334 |
+
"short_description": "A frisbee is a round, flying disc that you can throw and catch with friends.",
|
| 335 |
+
"category": "Playground",
|
| 336 |
+
"rarity": "rare",
|
| 337 |
+
"fun_fact": "There's a sport called Ultimate Frisbee, like soccer with Frisbees.",
|
| 338 |
+
"id": 33,
|
| 339 |
+
"prompt": "a photo of a frisbee (flying disc)",
|
| 340 |
+
"slug": "frisbee"
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"name": "Climbing Frame",
|
| 344 |
+
"short_description": "A climbing frame is a structure to climb and play on. It's super fun!",
|
| 345 |
+
"category": "Playground",
|
| 346 |
+
"rarity": "super rare",
|
| 347 |
+
"fun_fact": "Climbing frames can be shaped like rockets or castles!",
|
| 348 |
+
"id": 34,
|
| 349 |
+
"prompt": "a photo of a climbing frame (jungle gym)",
|
| 350 |
+
"slug": "climbing_frame"
|
| 351 |
+
},
|
| 352 |
+
{
|
| 353 |
+
"name": "Bouncer",
|
| 354 |
+
"short_description": "A bouncer is a fun thing to jump on. It helps you bounce up and down!",
|
| 355 |
+
"category": "Playground",
|
| 356 |
+
"rarity": "super rare",
|
| 357 |
+
"fun_fact": "Springs or bouncy air push you back up after each jump—boing, boing!",
|
| 358 |
+
"id": 35,
|
| 359 |
+
"prompt": "a photo of a playground bouncer (spring rider toy)",
|
| 360 |
+
"slug": "bouncer"
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"name": "Dog",
|
| 364 |
+
"short_description": "A dog is a fun, furry friend. They love to play and wag their tails!",
|
| 365 |
+
"category": "Animals",
|
| 366 |
+
"rarity": "common",
|
| 367 |
+
"fun_fact": "Dogs have a super sense of smell, much better than ours!",
|
| 368 |
+
"id": 36,
|
| 369 |
+
"prompt": "a photo of a dog (domestic dog)",
|
| 370 |
+
"slug": "dog"
|
| 371 |
+
},
|
| 372 |
+
{
|
| 373 |
+
"name": "Cat",
|
| 374 |
+
"short_description": "Cats are furry animals with whiskers and tails. They love to purr and nap.",
|
| 375 |
+
"category": "Animals",
|
| 376 |
+
"rarity": "common",
|
| 377 |
+
"fun_fact": "Whiskers help cats feel around in the dark or tight spaces.",
|
| 378 |
+
"id": 37,
|
| 379 |
+
"prompt": "a photo of a cat (house cat)",
|
| 380 |
+
"slug": "cat"
|
| 381 |
+
},
|
| 382 |
+
{
|
| 383 |
+
"name": "Bird",
|
| 384 |
+
"short_description": "Birds have wings and feathers. They love to chirp and fly!",
|
| 385 |
+
"category": "Animals",
|
| 386 |
+
"rarity": "common",
|
| 387 |
+
"fun_fact": "Penguins are birds but can't fly; they are great swimmers!",
|
| 388 |
+
"id": 38,
|
| 389 |
+
"prompt": "a photo of a bird (animal with feathers)",
|
| 390 |
+
"slug": "bird"
|
| 391 |
+
},
|
| 392 |
+
{
|
| 393 |
+
"name": "Fly",
|
| 394 |
+
"short_description": "Flies are tiny insects that love to buzz around. They have wings and big eyes!",
|
| 395 |
+
"category": "Animals",
|
| 396 |
+
"rarity": "common",
|
| 397 |
+
"fun_fact": "A fly's wings beat over 200 times every second!",
|
| 398 |
+
"id": 39,
|
| 399 |
+
"prompt": "a photo of a fly (housefly insect)",
|
| 400 |
+
"slug": "fly"
|
| 401 |
+
},
|
| 402 |
+
{
|
| 403 |
+
"name": "Fish",
|
| 404 |
+
"short_description": "Fish live in water and come in many colors and sizes. They have fins to swim.",
|
| 405 |
+
"category": "Animals",
|
| 406 |
+
"rarity": "common",
|
| 407 |
+
"fun_fact": "Fish can sleep with their eyes open—because they have no eyelids!",
|
| 408 |
+
"id": 40,
|
| 409 |
+
"prompt": "a photo of a fish (aquarium or pond fish)",
|
| 410 |
+
"slug": "fish"
|
| 411 |
+
},
|
| 412 |
+
{
|
| 413 |
+
"name": "Rabbit",
|
| 414 |
+
"short_description": "Rabbits are fluffy animals with long ears and love to hop around.",
|
| 415 |
+
"category": "Animals",
|
| 416 |
+
"rarity": "rare",
|
| 417 |
+
"fun_fact": "A rabbit's teeth never stop growing, so they chew a lot!",
|
| 418 |
+
"id": 41,
|
| 419 |
+
"prompt": "a photo of a rabbit (bunny)",
|
| 420 |
+
"slug": "rabbit"
|
| 421 |
+
},
|
| 422 |
+
{
|
| 423 |
+
"name": "Frog",
|
| 424 |
+
"short_description": "Frogs are jumpy animals that live in ponds. They make funny ribbit sounds!",
|
| 425 |
+
"category": "Animals",
|
| 426 |
+
"rarity": "rare",
|
| 427 |
+
"fun_fact": "Frogs can jump over 20 times their own body length!",
|
| 428 |
+
"id": 42,
|
| 429 |
+
"prompt": "a photo of a frog (amphibian)",
|
| 430 |
+
"slug": "frog"
|
| 431 |
+
},
|
| 432 |
+
{
|
| 433 |
+
"name": "Duck",
|
| 434 |
+
"short_description": "Ducks are birds that love to swim and quack. They have webbed feet!",
|
| 435 |
+
"category": "Animals",
|
| 436 |
+
"rarity": "super rare",
|
| 437 |
+
"fun_fact": "Ducks have waterproof feathers thanks to special oils they produce.",
|
| 438 |
+
"id": 43,
|
| 439 |
+
"prompt": "a photo of a duck (water bird)",
|
| 440 |
+
"slug": "duck"
|
| 441 |
+
},
|
| 442 |
+
{
|
| 443 |
+
"name": "Squirrel",
|
| 444 |
+
"short_description": "Squirrels are small, bushy-tailed animals that love to climb trees. They gather nuts!",
|
| 445 |
+
"category": "Animals",
|
| 446 |
+
"rarity": "super rare",
|
| 447 |
+
"fun_fact": "Squirrels plant thousands of trees by forgetting where they buried nuts.",
|
| 448 |
+
"id": 44,
|
| 449 |
+
"prompt": "a photo of a squirrel (tree squirrel)",
|
| 450 |
+
"slug": "squirrel"
|
| 451 |
+
},
|
| 452 |
+
{
|
| 453 |
+
"name": "Car",
|
| 454 |
+
"short_description": "A car has wheels and a steering wheel. It can take you places!",
|
| 455 |
+
"category": "Transportation",
|
| 456 |
+
"rarity": "common",
|
| 457 |
+
"fun_fact": "The first cars had no steering wheels—drivers used levers instead.",
|
| 458 |
+
"id": 45,
|
| 459 |
+
"prompt": "a photo of a car (automobile)",
|
| 460 |
+
"slug": "car"
|
| 461 |
+
},
|
| 462 |
+
{
|
| 463 |
+
"name": "Bus",
|
| 464 |
+
"short_description": "A bus is a big vehicle that takes people to places. It's like a car, but much bigger!",
|
| 465 |
+
"category": "Transportation",
|
| 466 |
+
"rarity": "common",
|
| 467 |
+
"fun_fact": "The first buses were pulled by horses before engines were invented!",
|
| 468 |
+
"id": 46,
|
| 469 |
+
"prompt": "a photo of a bus (passenger bus)",
|
| 470 |
+
"slug": "bus"
|
| 471 |
+
},
|
| 472 |
+
{
|
| 473 |
+
"name": "Bicycle",
|
| 474 |
+
"short_description": "Bicycles have pedals and handlebars you use to ride around. They're great for fun and exercise!",
|
| 475 |
+
"category": "Transportation",
|
| 476 |
+
"rarity": "common",
|
| 477 |
+
"fun_fact": "The first bicycles were called \"velocipedes\" and had wooden wheels!",
|
| 478 |
+
"id": 47,
|
| 479 |
+
"prompt": "a photo of a bicycle (bike)",
|
| 480 |
+
"slug": "bicycle"
|
| 481 |
+
},
|
| 482 |
+
{
|
| 483 |
+
"name": "Scooter",
|
| 484 |
+
"short_description": "A scooter is a ride-on toy with two wheels. You can push with your feet!",
|
| 485 |
+
"category": "Transportation",
|
| 486 |
+
"rarity": "common",
|
| 487 |
+
"fun_fact": "Early scooters were made of wood with roller skate wheels.",
|
| 488 |
+
"id": 48,
|
| 489 |
+
"prompt": "a photo of a scooter (kick scooter)",
|
| 490 |
+
"slug": "scooter"
|
| 491 |
+
},
|
| 492 |
+
{
|
| 493 |
+
"name": "Train",
|
| 494 |
+
"short_description": "Trains are big, fast vehicles that travel on tracks. They carry people and goods.",
|
| 495 |
+
"category": "Transportation",
|
| 496 |
+
"rarity": "common",
|
| 497 |
+
"fun_fact": "The longest train ever was over 4.5 miles long!",
|
| 498 |
+
"id": 49,
|
| 499 |
+
"prompt": "a photo of a train (railway train)",
|
| 500 |
+
"slug": "train"
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"name": "Fire Truck",
|
| 504 |
+
"short_description": "A fire truck helps put out fires. It carries firefighters and water.",
|
| 505 |
+
"category": "Transportation",
|
| 506 |
+
"rarity": "rare",
|
| 507 |
+
"fun_fact": "Some fire trucks have ladders that reach over 100 feet high.",
|
| 508 |
+
"id": 50,
|
| 509 |
+
"prompt": "a photo of a fire truck (fire engine)",
|
| 510 |
+
"slug": "fire_truck"
|
| 511 |
+
},
|
| 512 |
+
{
|
| 513 |
+
"name": "Tractor",
|
| 514 |
+
"short_description": "Tractors are big machines that help farmers. They pull heavy things and plow fields.",
|
| 515 |
+
"category": "Transportation",
|
| 516 |
+
"rarity": "rare",
|
| 517 |
+
"fun_fact": "Some tractors have wheels taller than a person!",
|
| 518 |
+
"id": 51,
|
| 519 |
+
"prompt": "a photo of a tractor (farm tractor)",
|
| 520 |
+
"slug": "tractor"
|
| 521 |
+
},
|
| 522 |
+
{
|
| 523 |
+
"name": "Plane",
|
| 524 |
+
"short_description": "Planes are big flying machines. They take us on long trips in the sky!",
|
| 525 |
+
"category": "Transportation",
|
| 526 |
+
"rarity": "super rare",
|
| 527 |
+
"fun_fact": "The fastest passenger plane flew faster than the speed of sound.",
|
| 528 |
+
"id": 52,
|
| 529 |
+
"prompt": "a photo of an airplane (passenger plane)",
|
| 530 |
+
"slug": "plane"
|
| 531 |
+
},
|
| 532 |
+
{
|
| 533 |
+
"name": "Boat",
|
| 534 |
+
"short_description": "A boat is a fun way to travel on water. It can be big or small!",
|
| 535 |
+
"category": "Transportation",
|
| 536 |
+
"rarity": "super rare",
|
| 537 |
+
"fun_fact": "Viking boats were called longships and had dragon heads!",
|
| 538 |
+
"id": 53,
|
| 539 |
+
"prompt": "a photo of a boat (watercraft)",
|
| 540 |
+
"slug": "boat"
|
| 541 |
+
},
|
| 542 |
+
{
|
| 543 |
+
"name": "Toothbrush",
|
| 544 |
+
"short_description": "A toothbrush helps clean your teeth. Use it with toothpaste for a shiny smile!",
|
| 545 |
+
"category": "House",
|
| 546 |
+
"rarity": "common",
|
| 547 |
+
"fun_fact": "The first toothbrushes were made from sticks and twigs!",
|
| 548 |
+
"id": 54,
|
| 549 |
+
"prompt": "a photo of a toothbrush (manual toothbrush)",
|
| 550 |
+
"slug": "toothbrush"
|
| 551 |
+
},
|
| 552 |
+
{
|
| 553 |
+
"name": "Towel",
|
| 554 |
+
"short_description": "A towel is soft and helps you dry off after a bath or swim.",
|
| 555 |
+
"category": "House",
|
| 556 |
+
"rarity": "common",
|
| 557 |
+
"fun_fact": "Towels soak up water with thousands of tiny fabric loops.",
|
| 558 |
+
"id": 55,
|
| 559 |
+
"prompt": "a photo of a towel (bath towel)",
|
| 560 |
+
"slug": "towel"
|
| 561 |
+
},
|
| 562 |
+
{
|
| 563 |
+
"name": "Lamp",
|
| 564 |
+
"short_description": "A lamp lights up a room. You turn it on with a switch!",
|
| 565 |
+
"category": "House",
|
| 566 |
+
"rarity": "common",
|
| 567 |
+
"fun_fact": "The first electric lamp was made over 140 years ago by Thomas Edison.",
|
| 568 |
+
"id": 56,
|
| 569 |
+
"prompt": "a photo of a lamp (table or floor lamp)",
|
| 570 |
+
"slug": "lamp"
|
| 571 |
+
},
|
| 572 |
+
{
|
| 573 |
+
"name": "Chair",
|
| 574 |
+
"short_description": "A chair is a seat with four legs and a back. It's great for sitting!",
|
| 575 |
+
"category": "House",
|
| 576 |
+
"rarity": "common",
|
| 577 |
+
"fun_fact": "In ancient Egypt, only kings and queens had chairs with backs.",
|
| 578 |
+
"id": 57,
|
| 579 |
+
"prompt": "a photo of a chair (wooden or cushioned chair)",
|
| 580 |
+
"slug": "chair"
|
| 581 |
+
},
|
| 582 |
+
{
|
| 583 |
+
"name": "Cushion",
|
| 584 |
+
"short_description": "A cushion is a soft pad you can sit on. It makes couches more comfy!",
|
| 585 |
+
"category": "House",
|
| 586 |
+
"rarity": "common",
|
| 587 |
+
"fun_fact": "In ancient times, cushions were signs of royalty and power.",
|
| 588 |
+
"id": 58,
|
| 589 |
+
"prompt": "a photo of a cushion (sofa cushion)",
|
| 590 |
+
"slug": "cushion"
|
| 591 |
+
},
|
| 592 |
+
{
|
| 593 |
+
"name": "Fan",
|
| 594 |
+
"short_description": "A fan makes the air move and keeps us cool. It's like a mini wind!",
|
| 595 |
+
"category": "House",
|
| 596 |
+
"rarity": "rare",
|
| 597 |
+
"fun_fact": "Electric fans were invented over 130 years ago!",
|
| 598 |
+
"id": 59,
|
| 599 |
+
"prompt": "a photo of a fan (electric fan)",
|
| 600 |
+
"slug": "fan"
|
| 601 |
+
},
|
| 602 |
+
{
|
| 603 |
+
"name": "Clock",
|
| 604 |
+
"short_description": "A clock tells the time. It has numbers and hands to point.",
|
| 605 |
+
"category": "House",
|
| 606 |
+
"rarity": "rare",
|
| 607 |
+
"fun_fact": "Big Ben is a famous clock tower in London, England.",
|
| 608 |
+
"id": 60,
|
| 609 |
+
"prompt": "a photo of a clock (analog or wall clock)",
|
| 610 |
+
"slug": "clock"
|
| 611 |
+
},
|
| 612 |
+
{
|
| 613 |
+
"name": "Painting",
|
| 614 |
+
"short_description": "Paintings are cool pictures made with colors on paper or canvas.",
|
| 615 |
+
"category": "House",
|
| 616 |
+
"rarity": "super rare",
|
| 617 |
+
"fun_fact": "Famous artist Leonardo da Vinci loved painting with eggs.",
|
| 618 |
+
"id": 61,
|
| 619 |
+
"prompt": "a photo of a painting (art on canvas or paper)",
|
| 620 |
+
"slug": "painting"
|
| 621 |
+
},
|
| 622 |
+
{
|
| 623 |
+
"name": "Plant Pot",
|
| 624 |
+
"short_description": "A plant pot holds soil for growing plants. It's like a tiny garden!",
|
| 625 |
+
"category": "House",
|
| 626 |
+
"rarity": "super rare",
|
| 627 |
+
"fun_fact": "Using big pots lets plants grow big and tall!",
|
| 628 |
+
"id": 62,
|
| 629 |
+
"prompt": "a photo of a plant pot (flower pot)",
|
| 630 |
+
"slug": "plant_pot"
|
| 631 |
+
},
|
| 632 |
+
{
|
| 633 |
+
"name": "T-Shirt",
|
| 634 |
+
"short_description": "A T-shirt is a piece of clothing with short sleeves. It's soft and comfy!",
|
| 635 |
+
"category": "Clothing",
|
| 636 |
+
"rarity": "common",
|
| 637 |
+
"fun_fact": "T-shirts got their name because of their T-shape.",
|
| 638 |
+
"id": 63,
|
| 639 |
+
"prompt": "a photo of a t-shirt (short-sleeved shirt)",
|
| 640 |
+
"slug": "t_shirt"
|
| 641 |
+
},
|
| 642 |
+
{
|
| 643 |
+
"name": "Socks",
|
| 644 |
+
"short_description": "Socks are soft cloth tubes for your feet. They come in lots of colors!",
|
| 645 |
+
"category": "Clothing",
|
| 646 |
+
"rarity": "common",
|
| 647 |
+
"fun_fact": "There are special socks for running with extra cushion.",
|
| 648 |
+
"id": 64,
|
| 649 |
+
"prompt": "a photo of socks (pair of socks)",
|
| 650 |
+
"slug": "socks"
|
| 651 |
+
},
|
| 652 |
+
{
|
| 653 |
+
"name": "Shoes",
|
| 654 |
+
"short_description": "Shoes protect your feet and look cool! They come in all colors.",
|
| 655 |
+
"category": "Clothing",
|
| 656 |
+
"rarity": "common",
|
| 657 |
+
"fun_fact": "Shoes were first invented over 5,000 years ago!",
|
| 658 |
+
"id": 65,
|
| 659 |
+
"prompt": "a photo of shoes (pair of shoes)",
|
| 660 |
+
"slug": "shoes"
|
| 661 |
+
},
|
| 662 |
+
{
|
| 663 |
+
"name": "Hat",
|
| 664 |
+
"short_description": "A hat is something you wear on your head. It can keep you warm or cool!",
|
| 665 |
+
"category": "Clothing",
|
| 666 |
+
"rarity": "common",
|
| 667 |
+
"fun_fact": "Some hats are made just for magic tricks!",
|
| 668 |
+
"id": 66,
|
| 669 |
+
"prompt": "a photo of a hat (headwear)",
|
| 670 |
+
"slug": "hat"
|
| 671 |
+
},
|
| 672 |
+
{
|
| 673 |
+
"name": "Jacket",
|
| 674 |
+
"short_description": "A jacket keeps you warm when it's chilly. It's like a cozy hug.",
|
| 675 |
+
"category": "Clothing",
|
| 676 |
+
"rarity": "common",
|
| 677 |
+
"fun_fact": "Winter jackets are often stuffed with fluffy feathers!",
|
| 678 |
+
"id": 67,
|
| 679 |
+
"prompt": "a photo of a jacket (outerwear)",
|
| 680 |
+
"slug": "jacket"
|
| 681 |
+
},
|
| 682 |
+
{
|
| 683 |
+
"name": "Scarf",
|
| 684 |
+
"short_description": "A scarf is a soft piece of clothing you wear around your neck. It keeps you warm.",
|
| 685 |
+
"category": "Clothing",
|
| 686 |
+
"rarity": "rare",
|
| 687 |
+
"fun_fact": "In ancient Rome, scarves were used to wipe sweat, not for warmth.",
|
| 688 |
+
"id": 68,
|
| 689 |
+
"prompt": "a photo of a scarf (neckwear)",
|
| 690 |
+
"slug": "scarf"
|
| 691 |
+
},
|
| 692 |
+
{
|
| 693 |
+
"name": "Gloves",
|
| 694 |
+
"short_description": "Gloves are like tiny jackets for your hands. They keep you warm or safe!",
|
| 695 |
+
"category": "Clothing",
|
| 696 |
+
"rarity": "rare",
|
| 697 |
+
"fun_fact": "Astronauts wear special gloves to hold tools in space!",
|
| 698 |
+
"id": 69,
|
| 699 |
+
"prompt": "a photo of gloves (pair of gloves)",
|
| 700 |
+
"slug": "gloves"
|
| 701 |
+
},
|
| 702 |
+
{
|
| 703 |
+
"name": "Sunglasses",
|
| 704 |
+
"short_description": "Sunglasses protect your eyes from the bright sun. They come in cool shapes!",
|
| 705 |
+
"category": "Clothing",
|
| 706 |
+
"rarity": "rare",
|
| 707 |
+
"fun_fact": "In ancient China, judges wore dark glasses to hide their eyes in court.",
|
| 708 |
+
"id": 70,
|
| 709 |
+
"prompt": "a photo of sunglasses (pair of sunglasses)",
|
| 710 |
+
"slug": "sunglasses"
|
| 711 |
+
},
|
| 712 |
+
{
|
| 713 |
+
"name": "Umbrella",
|
| 714 |
+
"short_description": "Umbrellas keep you dry in the rain. They open wide like a big flower!",
|
| 715 |
+
"category": "Clothing",
|
| 716 |
+
"rarity": "super rare",
|
| 717 |
+
"fun_fact": "Umbrellas were once used as sun protection, not for rain.",
|
| 718 |
+
"id": 71,
|
| 719 |
+
"prompt": "a photo of an umbrella (rain umbrella)",
|
| 720 |
+
"slug": "umbrella"
|
| 721 |
+
},
|
| 722 |
+
{
|
| 723 |
+
"name": "Sofa",
|
| 724 |
+
"short_description": "A sofa is a big, comfy seat. You can sit or lie on it.",
|
| 725 |
+
"category": "Living Room",
|
| 726 |
+
"rarity": "common",
|
| 727 |
+
"fun_fact": "The word 'sofa' comes from an Arabic word meaning 'bench with cushions'.",
|
| 728 |
+
"id": 72,
|
| 729 |
+
"prompt": "a photo of a sofa (couch)",
|
| 730 |
+
"slug": "sofa"
|
| 731 |
+
},
|
| 732 |
+
{
|
| 733 |
+
"name": "TV",
|
| 734 |
+
"short_description": "A TV shows your favorite cartoons and movies with sound and pictures.",
|
| 735 |
+
"category": "Living Room",
|
| 736 |
+
"rarity": "common",
|
| 737 |
+
"fun_fact": "The first TV shows were black and white.",
|
| 738 |
+
"id": 73,
|
| 739 |
+
"prompt": "a photo of a television (TV set)",
|
| 740 |
+
"slug": "tv"
|
| 741 |
+
},
|
| 742 |
+
{
|
| 743 |
+
"name": "Remote Control",
|
| 744 |
+
"short_description": "A remote helps you change channels on the TV. It has buttons to press!",
|
| 745 |
+
"category": "Living Room",
|
| 746 |
+
"rarity": "rare",
|
| 747 |
+
"fun_fact": "Remote controls use light beams called infrared to work.",
|
| 748 |
+
"id": 74,
|
| 749 |
+
"prompt": "a photo of a remote control (TV remote)",
|
| 750 |
+
"slug": "remote_control"
|
| 751 |
+
},
|
| 752 |
+
{
|
| 753 |
+
"name": "Rug",
|
| 754 |
+
"short_description": "Rugs are soft mats that cover the floor. They come in many colors and shapes.",
|
| 755 |
+
"category": "Living Room",
|
| 756 |
+
"rarity": "common",
|
| 757 |
+
"fun_fact": "Some handmade rugs can take years to finish.",
|
| 758 |
+
"id": 75,
|
| 759 |
+
"prompt": "a photo of a rug (floor carpet)",
|
| 760 |
+
"slug": "rug"
|
| 761 |
+
},
|
| 762 |
+
{
|
| 763 |
+
"name": "Bookshelf",
|
| 764 |
+
"short_description": "A bookshelf holds your books, keeping them tidy and easy to find.",
|
| 765 |
+
"category": "Living Room",
|
| 766 |
+
"rarity": "rare",
|
| 767 |
+
"fun_fact": "Some bookshelves have secret compartments for hiding small treasures.",
|
| 768 |
+
"id": 76,
|
| 769 |
+
"prompt": "a photo of a bookshelf (bookcase)",
|
| 770 |
+
"slug": "bookshelf"
|
| 771 |
+
},
|
| 772 |
+
{
|
| 773 |
+
"name": "Blanket",
|
| 774 |
+
"short_description": "A blanket is soft and keeps you warm. It's great for snuggling!",
|
| 775 |
+
"category": "Living Room",
|
| 776 |
+
"rarity": "rare",
|
| 777 |
+
"fun_fact": "Blankets keep you warm by trapping your body heat inside.",
|
| 778 |
+
"id": 77,
|
| 779 |
+
"prompt": "a photo of a blanket (throw blanket)",
|
| 780 |
+
"slug": "blanket"
|
| 781 |
+
},
|
| 782 |
+
{
|
| 783 |
+
"name": "Candle",
|
| 784 |
+
"short_description": "Candles are sticks of wax with a wick that can be lit to make light. They also smell nice!",
|
| 785 |
+
"category": "Living Room",
|
| 786 |
+
"rarity": "super rare",
|
| 787 |
+
"fun_fact": "The earliest candles were made from animal fat, not wax.",
|
| 788 |
+
"id": 78,
|
| 789 |
+
"prompt": "a photo of a candle (wax candle)",
|
| 790 |
+
"slug": "candle"
|
| 791 |
+
},
|
| 792 |
+
{
|
| 793 |
+
"name": "Record Player",
|
| 794 |
+
"short_description": "A record player plays music from vinyl records. It uses a needle to read grooves.",
|
| 795 |
+
"category": "Living Room",
|
| 796 |
+
"rarity": "super rare",
|
| 797 |
+
"fun_fact": "A needle travels in grooves to create the music we hear.",
|
| 798 |
+
"id": 79,
|
| 799 |
+
"prompt": "a photo of a record player (turntable)",
|
| 800 |
+
"slug": "record_player"
|
| 801 |
+
},
|
| 802 |
+
{
|
| 803 |
+
"name": "Coffee Table",
|
| 804 |
+
"short_description": "A coffee table sits in front of the sofa and holds drinks, snacks, or board games.",
|
| 805 |
+
"category": "Living Room",
|
| 806 |
+
"rarity": "common",
|
| 807 |
+
"fun_fact": "Coffee tables can be square, round, or even shaped like animals!",
|
| 808 |
+
"id": 80,
|
| 809 |
+
"prompt": "a photo of a coffee table (low table)",
|
| 810 |
+
"slug": "coffee_table"
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"name": "Teddy Bear",
|
| 814 |
+
"short_description": "Teddy bears are soft, cuddly toys. They love hugs and snuggles!",
|
| 815 |
+
"category": "Toys & Crafts",
|
| 816 |
+
"rarity": "common",
|
| 817 |
+
"fun_fact": "Teddy bears are named after U.S. President Theodore Roosevelt.",
|
| 818 |
+
"id": 81,
|
| 819 |
+
"prompt": "a photo of a teddy bear (stuffed toy)",
|
| 820 |
+
"slug": "teddy_bear"
|
| 821 |
+
},
|
| 822 |
+
{
|
| 823 |
+
"name": "Toy Car",
|
| 824 |
+
"short_description": "A toy car is fun to zoom around. You can pretend it's on a racetrack!",
|
| 825 |
+
"category": "Toys & Crafts",
|
| 826 |
+
"rarity": "common",
|
| 827 |
+
"fun_fact": "Some tiny toy cars can roll over 20 feet on one push!",
|
| 828 |
+
"id": 82,
|
| 829 |
+
"prompt": "a photo of a toy car (miniature car)",
|
| 830 |
+
"slug": "toy_car"
|
| 831 |
+
},
|
| 832 |
+
{
|
| 833 |
+
"name": "LEGO",
|
| 834 |
+
"short_description": "LEGO bricks are colorful blocks that you can use to build anything you imagine!",
|
| 835 |
+
"category": "Toys & Crafts",
|
| 836 |
+
"rarity": "common",
|
| 837 |
+
"fun_fact": "The word LEGO means 'play well' in Danish.",
|
| 838 |
+
"id": 83,
|
| 839 |
+
"prompt": "a photo of lego bricks (building blocks)",
|
| 840 |
+
"slug": "lego"
|
| 841 |
+
},
|
| 842 |
+
{
|
| 843 |
+
"name": "Paintbrush",
|
| 844 |
+
"short_description": "A paintbrush is used to make art by spreading paint on surfaces. It has soft bristles!",
|
| 845 |
+
"category": "Toys & Crafts",
|
| 846 |
+
"rarity": "common",
|
| 847 |
+
"fun_fact": "The oldest known paintbrushes were made from animal hair tied to sticks.",
|
| 848 |
+
"id": 84,
|
| 849 |
+
"prompt": "a photo of a paintbrush (art brush)",
|
| 850 |
+
"slug": "paintbrush"
|
| 851 |
+
},
|
| 852 |
+
{
|
| 853 |
+
"name": "Doll",
|
| 854 |
+
"short_description": "A doll is a fun toy that looks like a person. You can dress it up and play with it!",
|
| 855 |
+
"category": "Toys & Crafts",
|
| 856 |
+
"rarity": "rare",
|
| 857 |
+
"fun_fact": "Ancient dolls were made from clay, wood, and even ivory.",
|
| 858 |
+
"id": 85,
|
| 859 |
+
"prompt": "a photo of a doll (toy doll)",
|
| 860 |
+
"slug": "doll"
|
| 861 |
+
},
|
| 862 |
+
{
|
| 863 |
+
"name": "Videogame Console",
|
| 864 |
+
"short_description": "A console lets you play video games on your TV. How fun!",
|
| 865 |
+
"category": "Toys & Crafts",
|
| 866 |
+
"rarity": "common",
|
| 867 |
+
"fun_fact": "The first home console was called the Magnavox Odyssey.",
|
| 868 |
+
"id": 86,
|
| 869 |
+
"prompt": "a photo of a video game console (gaming console)",
|
| 870 |
+
"slug": "videogame_console"
|
| 871 |
+
},
|
| 872 |
+
{
|
| 873 |
+
"name": "Puzzle Piece",
|
| 874 |
+
"short_description": "A puzzle piece fits with others to make a big picture. It's like a fun mystery!",
|
| 875 |
+
"category": "Toys & Crafts",
|
| 876 |
+
"rarity": "rare",
|
| 877 |
+
"fun_fact": "The first jigsaw puzzles were made from maps to teach kids geography.",
|
| 878 |
+
"id": 87,
|
| 879 |
+
"prompt": "a photo of a puzzle piece (jigsaw puzzle piece)",
|
| 880 |
+
"slug": "puzzle_piece"
|
| 881 |
+
},
|
| 882 |
+
{
|
| 883 |
+
"name": "Stickers",
|
| 884 |
+
"short_description": "Stickers are like colorful, sticky friends you can put on things! They make everything fun.",
|
| 885 |
+
"category": "Toys & Crafts",
|
| 886 |
+
"rarity": "rare",
|
| 887 |
+
"fun_fact": "Some artists use thousands of tiny stickers to create huge, amazing pictures.",
|
| 888 |
+
"id": 88,
|
| 889 |
+
"prompt": "a photo of stickers (adhesive stickers)",
|
| 890 |
+
"slug": "stickers"
|
| 891 |
+
},
|
| 892 |
+
{
|
| 893 |
+
"name": "Magic Wand",
|
| 894 |
+
"short_description": "A magic wand is a stick that wizards use to do magic! It sparkles and shines.",
|
| 895 |
+
"category": "Toys & Crafts",
|
| 896 |
+
"rarity": "super rare",
|
| 897 |
+
"fun_fact": "Wizards wave their wands in special ways for magic to work!",
|
| 898 |
+
"id": 89,
|
| 899 |
+
"prompt": "a photo of a magic wand (toy wand)",
|
| 900 |
+
"slug": "magic_wand"
|
| 901 |
+
}
|
| 902 |
+
]
|
reparam.py
ADDED
|
@@ -0,0 +1,341 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# For licensing see accompanying LICENSE file.
|
| 3 |
+
# Copyright (C) 2024 Apple Inc. All Rights Reserved.
|
| 4 |
+
#
|
| 5 |
+
from typing import Union, Tuple
|
| 6 |
+
|
| 7 |
+
import copy
|
| 8 |
+
import torch
|
| 9 |
+
import torch.nn as nn
|
| 10 |
+
import torch.nn.functional as F
|
| 11 |
+
|
| 12 |
+
__all__ = ["MobileOneBlock", "reparameterize_model"]
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class SEBlock(nn.Module):
|
| 16 |
+
"""Squeeze and Excite module.
|
| 17 |
+
|
| 18 |
+
Pytorch implementation of `Squeeze-and-Excitation Networks` -
|
| 19 |
+
https://arxiv.org/pdf/1709.01507.pdf
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self, in_channels: int, rd_ratio: float = 0.0625) -> None:
|
| 23 |
+
"""Construct a Squeeze and Excite Module.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
in_channels: Number of input channels.
|
| 27 |
+
rd_ratio: Input channel reduction ratio.
|
| 28 |
+
"""
|
| 29 |
+
super(SEBlock, self).__init__()
|
| 30 |
+
self.reduce = nn.Conv2d(
|
| 31 |
+
in_channels=in_channels,
|
| 32 |
+
out_channels=int(in_channels * rd_ratio),
|
| 33 |
+
kernel_size=1,
|
| 34 |
+
stride=1,
|
| 35 |
+
bias=True,
|
| 36 |
+
)
|
| 37 |
+
self.expand = nn.Conv2d(
|
| 38 |
+
in_channels=int(in_channels * rd_ratio),
|
| 39 |
+
out_channels=in_channels,
|
| 40 |
+
kernel_size=1,
|
| 41 |
+
stride=1,
|
| 42 |
+
bias=True,
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
def forward(self, inputs: torch.Tensor) -> torch.Tensor:
|
| 46 |
+
"""Apply forward pass."""
|
| 47 |
+
b, c, h, w = inputs.size()
|
| 48 |
+
x = F.avg_pool2d(inputs, kernel_size=[h, w])
|
| 49 |
+
x = self.reduce(x)
|
| 50 |
+
x = F.relu(x)
|
| 51 |
+
x = self.expand(x)
|
| 52 |
+
x = torch.sigmoid(x)
|
| 53 |
+
x = x.view(-1, c, 1, 1)
|
| 54 |
+
return inputs * x
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class MobileOneBlock(nn.Module):
|
| 58 |
+
"""MobileOne building block.
|
| 59 |
+
|
| 60 |
+
This block has a multi-branched architecture at train-time
|
| 61 |
+
and plain-CNN style architecture at inference time
|
| 62 |
+
For more details, please refer to our paper:
|
| 63 |
+
`An Improved One millisecond Mobile Backbone` -
|
| 64 |
+
https://arxiv.org/pdf/2206.04040.pdf
|
| 65 |
+
"""
|
| 66 |
+
|
| 67 |
+
def __init__(
|
| 68 |
+
self,
|
| 69 |
+
in_channels: int,
|
| 70 |
+
out_channels: int,
|
| 71 |
+
kernel_size: int,
|
| 72 |
+
stride: int = 1,
|
| 73 |
+
padding: int = 0,
|
| 74 |
+
dilation: int = 1,
|
| 75 |
+
groups: int = 1,
|
| 76 |
+
inference_mode: bool = False,
|
| 77 |
+
use_se: bool = False,
|
| 78 |
+
use_act: bool = True,
|
| 79 |
+
use_scale_branch: bool = True,
|
| 80 |
+
num_conv_branches: int = 1,
|
| 81 |
+
activation: nn.Module = nn.GELU(),
|
| 82 |
+
) -> None:
|
| 83 |
+
"""Construct a MobileOneBlock module.
|
| 84 |
+
|
| 85 |
+
Args:
|
| 86 |
+
in_channels: Number of channels in the input.
|
| 87 |
+
out_channels: Number of channels produced by the block.
|
| 88 |
+
kernel_size: Size of the convolution kernel.
|
| 89 |
+
stride: Stride size.
|
| 90 |
+
padding: Zero-padding size.
|
| 91 |
+
dilation: Kernel dilation factor.
|
| 92 |
+
groups: Group number.
|
| 93 |
+
inference_mode: If True, instantiates model in inference mode.
|
| 94 |
+
use_se: Whether to use SE-ReLU activations.
|
| 95 |
+
use_act: Whether to use activation. Default: ``True``
|
| 96 |
+
use_scale_branch: Whether to use scale branch. Default: ``True``
|
| 97 |
+
num_conv_branches: Number of linear conv branches.
|
| 98 |
+
"""
|
| 99 |
+
super(MobileOneBlock, self).__init__()
|
| 100 |
+
self.inference_mode = inference_mode
|
| 101 |
+
self.groups = groups
|
| 102 |
+
self.stride = stride
|
| 103 |
+
self.padding = padding
|
| 104 |
+
self.dilation = dilation
|
| 105 |
+
self.kernel_size = kernel_size
|
| 106 |
+
self.in_channels = in_channels
|
| 107 |
+
self.out_channels = out_channels
|
| 108 |
+
self.num_conv_branches = num_conv_branches
|
| 109 |
+
|
| 110 |
+
# Check if SE-ReLU is requested
|
| 111 |
+
if use_se:
|
| 112 |
+
self.se = SEBlock(out_channels)
|
| 113 |
+
else:
|
| 114 |
+
self.se = nn.Identity()
|
| 115 |
+
|
| 116 |
+
if use_act:
|
| 117 |
+
self.activation = activation
|
| 118 |
+
else:
|
| 119 |
+
self.activation = nn.Identity()
|
| 120 |
+
|
| 121 |
+
if inference_mode:
|
| 122 |
+
self.reparam_conv = nn.Conv2d(
|
| 123 |
+
in_channels=in_channels,
|
| 124 |
+
out_channels=out_channels,
|
| 125 |
+
kernel_size=kernel_size,
|
| 126 |
+
stride=stride,
|
| 127 |
+
padding=padding,
|
| 128 |
+
dilation=dilation,
|
| 129 |
+
groups=groups,
|
| 130 |
+
bias=True,
|
| 131 |
+
)
|
| 132 |
+
else:
|
| 133 |
+
# Re-parameterizable skip connection
|
| 134 |
+
self.rbr_skip = (
|
| 135 |
+
nn.BatchNorm2d(num_features=in_channels)
|
| 136 |
+
if out_channels == in_channels and stride == 1
|
| 137 |
+
else None
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
# Re-parameterizable conv branches
|
| 141 |
+
if num_conv_branches > 0:
|
| 142 |
+
rbr_conv = list()
|
| 143 |
+
for _ in range(self.num_conv_branches):
|
| 144 |
+
rbr_conv.append(
|
| 145 |
+
self._conv_bn(kernel_size=kernel_size, padding=padding)
|
| 146 |
+
)
|
| 147 |
+
self.rbr_conv = nn.ModuleList(rbr_conv)
|
| 148 |
+
else:
|
| 149 |
+
self.rbr_conv = None
|
| 150 |
+
|
| 151 |
+
# Re-parameterizable scale branch
|
| 152 |
+
self.rbr_scale = None
|
| 153 |
+
if not isinstance(kernel_size, int):
|
| 154 |
+
kernel_size = kernel_size[0]
|
| 155 |
+
if (kernel_size > 1) and use_scale_branch:
|
| 156 |
+
self.rbr_scale = self._conv_bn(kernel_size=1, padding=0)
|
| 157 |
+
|
| 158 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 159 |
+
"""Apply forward pass."""
|
| 160 |
+
# Inference mode forward pass.
|
| 161 |
+
if self.inference_mode:
|
| 162 |
+
return self.activation(self.se(self.reparam_conv(x)))
|
| 163 |
+
|
| 164 |
+
# Multi-branched train-time forward pass.
|
| 165 |
+
# Skip branch output
|
| 166 |
+
identity_out = 0
|
| 167 |
+
if self.rbr_skip is not None:
|
| 168 |
+
identity_out = self.rbr_skip(x)
|
| 169 |
+
|
| 170 |
+
# Scale branch output
|
| 171 |
+
scale_out = 0
|
| 172 |
+
if self.rbr_scale is not None:
|
| 173 |
+
scale_out = self.rbr_scale(x)
|
| 174 |
+
|
| 175 |
+
# Other branches
|
| 176 |
+
out = scale_out + identity_out
|
| 177 |
+
if self.rbr_conv is not None:
|
| 178 |
+
for ix in range(self.num_conv_branches):
|
| 179 |
+
out += self.rbr_conv[ix](x)
|
| 180 |
+
|
| 181 |
+
return self.activation(self.se(out))
|
| 182 |
+
|
| 183 |
+
def reparameterize(self):
|
| 184 |
+
"""Following works like `RepVGG: Making VGG-style ConvNets Great Again` -
|
| 185 |
+
https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branched
|
| 186 |
+
architecture used at training time to obtain a plain CNN-like structure
|
| 187 |
+
for inference.
|
| 188 |
+
"""
|
| 189 |
+
if self.inference_mode:
|
| 190 |
+
return
|
| 191 |
+
kernel, bias = self._get_kernel_bias()
|
| 192 |
+
self.reparam_conv = nn.Conv2d(
|
| 193 |
+
in_channels=self.in_channels,
|
| 194 |
+
out_channels=self.out_channels,
|
| 195 |
+
kernel_size=self.kernel_size,
|
| 196 |
+
stride=self.stride,
|
| 197 |
+
padding=self.padding,
|
| 198 |
+
dilation=self.dilation,
|
| 199 |
+
groups=self.groups,
|
| 200 |
+
bias=True,
|
| 201 |
+
)
|
| 202 |
+
self.reparam_conv.weight.data = kernel
|
| 203 |
+
self.reparam_conv.bias.data = bias
|
| 204 |
+
|
| 205 |
+
# Delete un-used branches
|
| 206 |
+
for para in self.parameters():
|
| 207 |
+
para.detach_()
|
| 208 |
+
self.__delattr__("rbr_conv")
|
| 209 |
+
self.__delattr__("rbr_scale")
|
| 210 |
+
if hasattr(self, "rbr_skip"):
|
| 211 |
+
self.__delattr__("rbr_skip")
|
| 212 |
+
|
| 213 |
+
self.inference_mode = True
|
| 214 |
+
|
| 215 |
+
def _get_kernel_bias(self) -> Tuple[torch.Tensor, torch.Tensor]:
|
| 216 |
+
"""Method to obtain re-parameterized kernel and bias.
|
| 217 |
+
Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L83
|
| 218 |
+
|
| 219 |
+
Returns:
|
| 220 |
+
Tuple of (kernel, bias) after fusing branches.
|
| 221 |
+
"""
|
| 222 |
+
# get weights and bias of scale branch
|
| 223 |
+
kernel_scale = 0
|
| 224 |
+
bias_scale = 0
|
| 225 |
+
if self.rbr_scale is not None:
|
| 226 |
+
kernel_scale, bias_scale = self._fuse_bn_tensor(self.rbr_scale)
|
| 227 |
+
# Pad scale branch kernel to match conv branch kernel size.
|
| 228 |
+
pad = self.kernel_size // 2
|
| 229 |
+
kernel_scale = torch.nn.functional.pad(kernel_scale, [pad, pad, pad, pad])
|
| 230 |
+
|
| 231 |
+
# get weights and bias of skip branch
|
| 232 |
+
kernel_identity = 0
|
| 233 |
+
bias_identity = 0
|
| 234 |
+
if self.rbr_skip is not None:
|
| 235 |
+
kernel_identity, bias_identity = self._fuse_bn_tensor(self.rbr_skip)
|
| 236 |
+
|
| 237 |
+
# get weights and bias of conv branches
|
| 238 |
+
kernel_conv = 0
|
| 239 |
+
bias_conv = 0
|
| 240 |
+
if self.rbr_conv is not None:
|
| 241 |
+
for ix in range(self.num_conv_branches):
|
| 242 |
+
_kernel, _bias = self._fuse_bn_tensor(self.rbr_conv[ix])
|
| 243 |
+
kernel_conv += _kernel
|
| 244 |
+
bias_conv += _bias
|
| 245 |
+
|
| 246 |
+
kernel_final = kernel_conv + kernel_scale + kernel_identity
|
| 247 |
+
bias_final = bias_conv + bias_scale + bias_identity
|
| 248 |
+
return kernel_final, bias_final
|
| 249 |
+
|
| 250 |
+
def _fuse_bn_tensor(
|
| 251 |
+
self, branch: Union[nn.Sequential, nn.BatchNorm2d]
|
| 252 |
+
) -> Tuple[torch.Tensor, torch.Tensor]:
|
| 253 |
+
"""Method to fuse batchnorm layer with preceeding conv layer.
|
| 254 |
+
Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95
|
| 255 |
+
|
| 256 |
+
Args:
|
| 257 |
+
branch: Sequence of ops to be fused.
|
| 258 |
+
|
| 259 |
+
Returns:
|
| 260 |
+
Tuple of (kernel, bias) after fusing batchnorm.
|
| 261 |
+
"""
|
| 262 |
+
if isinstance(branch, nn.Sequential):
|
| 263 |
+
kernel = branch.conv.weight
|
| 264 |
+
running_mean = branch.bn.running_mean
|
| 265 |
+
running_var = branch.bn.running_var
|
| 266 |
+
gamma = branch.bn.weight
|
| 267 |
+
beta = branch.bn.bias
|
| 268 |
+
eps = branch.bn.eps
|
| 269 |
+
else:
|
| 270 |
+
assert isinstance(branch, nn.BatchNorm2d)
|
| 271 |
+
if not hasattr(self, "id_tensor"):
|
| 272 |
+
input_dim = self.in_channels // self.groups
|
| 273 |
+
|
| 274 |
+
kernel_size = self.kernel_size
|
| 275 |
+
if isinstance(self.kernel_size, int):
|
| 276 |
+
kernel_size = (self.kernel_size, self.kernel_size)
|
| 277 |
+
|
| 278 |
+
kernel_value = torch.zeros(
|
| 279 |
+
(self.in_channels, input_dim, kernel_size[0], kernel_size[1]),
|
| 280 |
+
dtype=branch.weight.dtype,
|
| 281 |
+
device=branch.weight.device,
|
| 282 |
+
)
|
| 283 |
+
for i in range(self.in_channels):
|
| 284 |
+
kernel_value[
|
| 285 |
+
i, i % input_dim, kernel_size[0] // 2, kernel_size[1] // 2
|
| 286 |
+
] = 1
|
| 287 |
+
self.id_tensor = kernel_value
|
| 288 |
+
kernel = self.id_tensor
|
| 289 |
+
running_mean = branch.running_mean
|
| 290 |
+
running_var = branch.running_var
|
| 291 |
+
gamma = branch.weight
|
| 292 |
+
beta = branch.bias
|
| 293 |
+
eps = branch.eps
|
| 294 |
+
std = (running_var + eps).sqrt()
|
| 295 |
+
t = (gamma / std).reshape(-1, 1, 1, 1)
|
| 296 |
+
return kernel * t, beta - running_mean * gamma / std
|
| 297 |
+
|
| 298 |
+
def _conv_bn(self, kernel_size: int, padding: int) -> nn.Sequential:
|
| 299 |
+
"""Helper method to construct conv-batchnorm layers.
|
| 300 |
+
|
| 301 |
+
Args:
|
| 302 |
+
kernel_size: Size of the convolution kernel.
|
| 303 |
+
padding: Zero-padding size.
|
| 304 |
+
|
| 305 |
+
Returns:
|
| 306 |
+
Conv-BN module.
|
| 307 |
+
"""
|
| 308 |
+
mod_list = nn.Sequential()
|
| 309 |
+
mod_list.add_module(
|
| 310 |
+
"conv",
|
| 311 |
+
nn.Conv2d(
|
| 312 |
+
in_channels=self.in_channels,
|
| 313 |
+
out_channels=self.out_channels,
|
| 314 |
+
kernel_size=kernel_size,
|
| 315 |
+
stride=self.stride,
|
| 316 |
+
padding=padding,
|
| 317 |
+
groups=self.groups,
|
| 318 |
+
bias=False,
|
| 319 |
+
),
|
| 320 |
+
)
|
| 321 |
+
mod_list.add_module("bn", nn.BatchNorm2d(num_features=self.out_channels))
|
| 322 |
+
return mod_list
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
def reparameterize_model(model: torch.nn.Module) -> nn.Module:
|
| 326 |
+
"""Method returns a model where a multi-branched structure
|
| 327 |
+
used in training is re-parameterized into a single branch
|
| 328 |
+
for inference.
|
| 329 |
+
|
| 330 |
+
Args:
|
| 331 |
+
model: MobileOne model in train mode.
|
| 332 |
+
|
| 333 |
+
Returns:
|
| 334 |
+
MobileOne model in inference mode.
|
| 335 |
+
"""
|
| 336 |
+
# Avoid editing original graph
|
| 337 |
+
model = copy.deepcopy(model)
|
| 338 |
+
for module in model.modules():
|
| 339 |
+
if hasattr(module, "reparameterize"):
|
| 340 |
+
module.reparameterize()
|
| 341 |
+
return model
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
| 2 |
+
Pillow
|
| 3 |
+
torch>=2.0.0
|
| 4 |
+
torchvision
|
| 5 |
+
open_clip_torch
|
| 6 |
+
huggingface_hub>=0.23.0
|
| 7 |
+
safetensors>=0.4.3
|