Upload 3 files

Browse files

Files changed (3) hide show

camie_tagger_initial.onnx +3 -0
infer.py +80 -0
metadata.json +0 -0

camie_tagger_initial.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ed6e97bf389857516873416affedc0572bb15c1a39531db2e8f92dfd5abdf0d
+size 855879045

infer.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import onnxruntime as ort
+import numpy as np
+import json
+from PIL import Image
+# 1) Load ONNX model
+session = ort.InferenceSession("camie_tagger_initial.onnx", providers=["CPUExecutionProvider"])
+# 2) Preprocess your image (512x512, etc.)
+def preprocess_image(img_path):
+    """
+    Loads and resizes an image to 512x512, converts it to float32 [0..1],
+    and returns a (1,3,512,512) NumPy array (NCHW format).
+    """
+    img = Image.open(img_path).convert("RGB").resize((512, 512))
+    x = np.array(img).astype(np.float32) / 255.0
+    x = np.transpose(x, (2, 0, 1))  # HWC -> CHW
+    x = np.expand_dims(x, 0)        # add batch dimension -> (1,3,512,512)
+    return x
+# Example input
+def inference(input_path):
+    input_tensor = preprocess_image(input_path)
+    # 3) Run inference
+    input_name = session.get_inputs()[0].name
+    outputs = session.run(None, {input_name: input_tensor})
+    initial_logits, refined_logits = outputs  # shape: (1, 70527) each
+    # 4) Convert logits to probabilities via sigmoid
+    refined_probs = 1 / (1 + np.exp(-refined_logits))  # shape: (1, 70527)
+    # 5) Load metadata & retrieve threshold info
+    with open("metadata.json", "r", encoding="utf-8") as f:
+        metadata = json.load(f)
+    # Dictionary of idx->tag_name, e.g. { "0": "brown_hair", "1": "blue_eyes", ... }
+    idx_to_tag = metadata["idx_to_tag"]
+    # Dictionary of tag->category, e.g. { "brown_hair": "character", "landscape": "general", ... }
+    tag_to_category = metadata.get("tag_to_category", {})
+    # Dictionary of category->threshold, e.g. { "character": 0.30, "general": 0.325, ... }
+    # If not present or incomplete, we'll use a default threshold of 0.325
+    category_thresholds = metadata.get("category_thresholds", {})
+    default_threshold = 0.325
+    # 6) Collect predictions by category
+    # We'll loop through all tags and check if the probability is above the category-specific threshold
+    results_by_category = {}
+    num_tags = refined_probs.shape[1]  # 70527
+    for i in range(num_tags):
+        prob = float(refined_probs[0, i])  # get probability for this tag
+        tag_name = idx_to_tag[str(i)]      # convert index -> tag name (keys in idx_to_tag are strings)
+        # Find category; if not in 'tag_to_category', label it "unknown"
+        category = tag_to_category.get(tag_name, "unknown")
+        # Find threshold for this category; fallback to default
+        cat_threshold = category_thresholds.get(category, default_threshold)
+        # Check if prob meets or exceeds the threshold
+        if prob >= cat_threshold:
+            if category not in results_by_category:
+                results_by_category[category] = []
+            # Store the tag name + its probability
+            results_by_category[category].append((tag_name, prob))
+    # 7) Print out the predicted tags category-wise
+    print("Predicted Tags by Category:\n")
+    for cat, tags_list in results_by_category.items():
+        print(f"Category: {cat} | Predicted {len(tags_list)} tags")
+        for tname, tprob in sorted(tags_list, key=lambda x: x[1], reverse=True):
+            print(f"  Tag: {tname:30s}  Prob: {tprob:.4f}")
+        print()
+if __name__ == "__main__":
+    inference("example_image.jpg")

metadata.json ADDED Viewed

The diff for this file is too large to render. See raw diff