finhdev commited on
Commit
407a13c
·
verified ·
1 Parent(s): 048809c

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +126 -39
handler.py CHANGED
@@ -69,61 +69,148 @@ class EndpointHandler:
69
  key=lambda x: x["score"],
70
  reverse=True
71
  )
72
- # import contextlib, io, base64, torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  # from PIL import Image
74
  # import open_clip
75
- # from reparam import reparameterize_model
 
 
76
 
77
  # class EndpointHandler:
78
- # def __init__(self, path: str = ""):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # self.device = "cuda" if torch.cuda.is_available() else "cpu"
80
 
81
- # # Fix 1: Load weights directly from the web, just like local script
82
- # # This guarantees the weights are identical.
 
83
  # model, _, self.preprocess = open_clip.create_model_and_transforms(
84
- # "MobileCLIP-B", pretrained='datacompdr'
85
  # )
86
- # model.eval()
87
- # self.model = reparameterize_model(model) # fuse branches
88
-
89
- # self.tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
90
- # self.model.to(self.device)
91
-
92
- # # Fix 2: Explicitly set model to half-precision if on CUDA
93
- # # This matches the behavior of torch.set_default_dtype(torch.float16)
94
  # if self.device == "cuda":
95
- # self.model.to(torch.float16)
 
96
 
97
- # def __call__(self, data):
98
- # payload = data.get("inputs", data)
99
- # img_b64 = payload["image"]
100
- # labels = payload.get("candidate_labels", [])
101
- # if not labels:
102
- # return {"error": "candidate_labels list is empty"}
103
 
104
- # # ---------------- decode inputs ----------------
105
- # image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
106
- # img_tensor = self.preprocess(image).unsqueeze(0).to(self.device)
 
 
107
 
108
- # # The preprocessor might output float32, so ensure tensor matches model dtype
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  # if self.device == "cuda":
110
  # img_tensor = img_tensor.to(torch.float16)
111
 
112
- # text_tokens = self.tokenizer(labels).to(self.device)
 
 
 
113
 
114
- # # ---------------- forward pass -----------------
115
- # # No need for autocast if everything is already float16
116
- # with torch.no_grad():
117
- # img_feat = self.model.encode_image(img_tensor)
118
- # txt_feat = self.model.encode_text(text_tokens)
119
- # img_feat /= img_feat.norm(dim=-1, keepdim=True)
120
- # txt_feat /= txt_feat.norm(dim=-1, keepdim=True)
121
- # probs = (100 * img_feat @ txt_feat.T).softmax(dim=-1)[0].cpu().tolist()
122
-
123
- # return [
124
- # {"label": l, "score": float(p)}
125
- # for l, p in sorted(zip(labels, probs), key=lambda x: x[1], reverse=True)
126
- # ]
127
 
128
 
129
 
 
69
  key=lambda x: x["score"],
70
  reverse=True
71
  )
72
+
73
+
74
+
75
+ # """
76
+ # MobileCLIP‑B Zero‑Shot Image Classifier (Hugging Face Inference Endpoint)
77
+ # ===========================================================================
78
+
79
+ # * One container instance is created per replica; the `EndpointHandler`
80
+ # object below is instantiated exactly **once** at start‑up.
81
+
82
+ # * At request time (`__call__`) we receive a base‑64‑encoded image, run a
83
+ # **single forward pass**, and return class probabilities.
84
+
85
+ # Design choices
86
+ # --------------
87
+
88
+ # 1. **Model & transform come from OpenCLIP**
89
+ # This guarantees we apply **identical preprocessing** to what the model
90
+ # was trained with (224 × 224 crop + mean/std normalisation).
91
+
92
+ # 2. **Re‑parameterisation for inference**
93
+ # MobileCLIP uses MobileOne blocks that have extra convolution branches
94
+ # for training; `reparameterize_model` fuses them so inference is fast
95
+ # and deterministic.
96
+
97
+ # 3. **Text embeddings are cached**
98
+ # The class “prompts” (e.g. `"a photo of a cat"`) are encoded **once at
99
+ # start‑up**. Each request therefore encodes *only* the image and
100
+ # performs a single matrix multiplication.
101
+
102
+ # 4. **Mixed precision on GPU**
103
+ # If the container has CUDA, we cast the model **and** inputs to
104
+ # `float16`. That halves memory and roughly doubles throughput on most
105
+ # modern GPUs. On CPU we stay in `float32` for numerical stability.
106
+ # """
107
+
108
+ # import contextlib, io, base64, json
109
+ # from pathlib import Path
110
+ # from typing import Any, Dict, List
111
+
112
+ # import torch
113
  # from PIL import Image
114
  # import open_clip
115
+
116
+ # from reparam import reparameterize_model # local copy (~60 LoC) of Apple’s helper
117
+
118
 
119
  # class EndpointHandler:
120
+ # """
121
+ # Hugging Face entry‑point. The toolkit will instantiate this class
122
+ # once and call it for every HTTP request.
123
+
124
+ # Parameters
125
+ # ----------
126
+ # path : str, optional
127
+ # Root directory of the repository. HF mounts the code under
128
+ # `/repository`; we use this path to locate `items.json`.
129
+ # """
130
+
131
+ # # ------------------------------------------------------------------ #
132
+ # # INITIALISATION (runs **once**) #
133
+ # # ------------------------------------------------------------------ #
134
+ # def __init__(self, path: str = "") -> None:
135
  # self.device = "cuda" if torch.cuda.is_available() else "cpu"
136
 
137
+ # # 1️⃣ Load MobileCLIP‑B weights & transforms -------------------
138
+ # # `pretrained="datacompdr"` makes OpenCLIP download the
139
+ # # official checkpoint from the Hub (cached in the image layer).
140
  # model, _, self.preprocess = open_clip.create_model_and_transforms(
141
+ # "MobileCLIP-B", pretrained="datacompdr"
142
  # )
143
+ # model.eval() # disable dropout / BN updates
144
+ # model = reparameterize_model(model) # fuse MobileOne branches
145
+ # model.to(self.device)
 
 
 
 
 
146
  # if self.device == "cuda":
147
+ # model = model.to(torch.float16) # FP16 for throughput
148
+ # self.model = model # hold a reference
149
 
150
+ # # 2️⃣ Build the tokenizer once --------------------------------
151
+ # tokenizer = open_clip.get_tokenizer("MobileCLIP-B")
 
 
 
 
152
 
153
+ # # 3️⃣ Load class metadata -------------------------------------
154
+ # # Expect JSON file: [{"id": 3, "name": "cat", "prompt": "cat"}, …]
155
+ # items_path = Path(path) / "items.json"
156
+ # with items_path.open("r", encoding="utf-8") as f:
157
+ # class_defs: List[Dict[str, Any]] = json.load(f)
158
 
159
+ # # Extract the bits we need later
160
+ # prompts = [item["prompt"] for item in class_defs]
161
+ # self.class_ids: List[int] = [item["id"] for item in class_defs]
162
+ # self.class_names: List[str] = [item["name"] for item in class_defs]
163
+
164
+ # # 4️⃣ Encode all prompts once ---------------------------------
165
+ # with torch.no_grad():
166
+ # text_tokens = tokenizer(prompts).to(self.device)
167
+ # text_feats = self.model.encode_text(text_tokens)
168
+ # text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
169
+ # self.text_features = text_feats # [num_classes, 512]
170
+
171
+ # # ------------------------------------------------------------------ #
172
+ # # INFERENCE CALL #
173
+ # # ------------------------------------------------------------------ #
174
+ # def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
175
+ # """
176
+ # Parameters
177
+ # ----------
178
+ # data : dict
179
+ # Either the raw payload `{"image": "<base64>"}` **or** the
180
+ # Hugging Face convention `{"inputs": {...}}`.
181
+
182
+ # Returns
183
+ # -------
184
+ # list of dict
185
+ # Sorted list of `{"id": int, "label": str, "score": float}`.
186
+ # Scores are the softmax probabilities over the *provided*
187
+ # class list (they sum to 1.0).
188
+ # """
189
+ # # 1️⃣ Unpack the request payload ------------------------------
190
+ # payload: Dict[str, Any] = data.get("inputs", data)
191
+ # img_b64: str = payload["image"]
192
+
193
+ # # 2️⃣ Decode + preprocess -------------------------------------
194
+ # image = Image.open(io.BytesIO(base64.b64decode(img_b64))).convert("RGB")
195
+ # img_tensor = self.preprocess(image).unsqueeze(0).to(self.device) # [1, 3, 224, 224]
196
  # if self.device == "cuda":
197
  # img_tensor = img_tensor.to(torch.float16)
198
 
199
+ # # 3️⃣ Forward pass (image only) -------------------------------
200
+ # with torch.no_grad(): # no autograd graph
201
+ # img_feat = self.model.encode_image(img_tensor) # [1, 512]
202
+ # img_feat = img_feat / img_feat.norm(dim=-1, keepdim=True) # L2‑normalise
203
 
204
+ # # cosine similarity logits → softmax probabilities
205
+ # probs = (100 * img_feat @ self.text_features.T).softmax(dim=-1)[0] # [num_classes]
206
+
207
+ # # 4️⃣ Assemble JSON‑serialisable response ---------------------
208
+ # results = zip(self.class_ids, self.class_names, probs.cpu().tolist())
209
+ # return sorted(
210
+ # [{"id": cid, "label": name, "score": float(p)} for cid, name, p in results],
211
+ # key=lambda x: x["score"],
212
+ # reverse=True,
213
+ # )
 
 
 
214
 
215
 
216