IJEPA-WEB / index.html
lorien-danger's picture
Update index.html
a2463da verified
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>I‑JEPA Web (ViT‑H/14)</title>
<script src="https://cdn.tailwindcss.com"></script>
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet" />
<style>
body{font-family:"Inter",-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,Helvetica,Arial,sans-serif}
input[type="range"]{-webkit-appearance:none;appearance:none;width:100%;height:.5rem;background:#4a5568;border-radius:.25rem;outline:none;opacity:.7;transition:opacity .2s}
input[type="range"]:hover{opacity:1}
input[type="range"]::-webkit-slider-thumb{-webkit-appearance:none;appearance:none;width:1.25rem;height:1.25rem;background:#90cdf4;cursor:pointer;border-radius:50%}
input[type="range"]::-moz-range-thumb{width:1.25rem;height:1.25rem;background:#90cdf4;cursor:pointer;border-radius:50%}
#modeToggle:checked ~ .dot{transform:translateX(1.5rem)}
#modeToggle:checked ~ .block{background-color:#3b82f6}
</style>
</head>
<body class="bg-gray-900 text-gray-300 flex flex-col items-center justify-center min-h-screen p-4 sm:p-6 lg:p-8">
<div class="w-full max-w-3xl bg-gray-800/50 backdrop-blur-sm rounded-2xl shadow-2xl shadow-black/30 border border-gray-700 p-6 sm:p-8 text-center">
<h1 class="text-3xl sm:text-4xl font-bold text-transparent bg-clip-text bg-gradient-to-r from-blue-400 to-purple-500 mb-2">
I‑JEPA Web (ViT‑H/14)
</h1>
<p class="text-gray-400 mb-8 max-w-xl mx-auto">Explore dense patch‑level similarities from <span class="font-semibold">onnx-community/ijepa_vith14_1k</span> entirely in your browser.</p>
<div class="space-y-6">
<div id="dropZone" class="relative flex flex-col items-center justify-center bg-gray-900/50 border-2 border-dashed border-gray-600 rounded-xl p-6 text-center group hover:border-blue-500 transition-colors duration-300">
<svg class="w-12 h-12 mb-4 text-gray-500 group-hover:text-blue-500 transition-colors duration-300" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 20 16">
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M13 13h3a3 3 0 0 0 0-6h-.025A5.56 5.56 0 0 0 16 6.5 5.5 5.5 0 0 0 5.207 5.021C5.137 5.017 5.071 5 5 5a4 4 0 0 0 0 8h2.167M10 15V6m0 0L8 8m2-2 2 2"/>
</svg>
<p class="font-semibold text-gray-300">Click to upload or drag & drop</p>
<p class="text-xs text-gray-500 mb-2">PNG, JPG, or other image formats</p>
<p class="text-sm text-gray-400">Or <button id="exampleBtn" class="relative z-10 text-blue-400 hover:text-blue-300 font-semibold underline bg-transparent border-none cursor-pointer p-0">try an example</button>.</p>
<label for="imageLoader" class="absolute inset-0 cursor-pointer z-0"></label>
<input type="file" id="imageLoader" accept="image/*" class="hidden" />
</div>
<div class="bg-gray-900/50 p-4 rounded-xl border border-gray-700 space-y-4">
<div class="grid grid-cols-1 sm:grid-cols-2 gap-4 items-center">
<div class="flex items-center justify-center w-full space-x-3">
<label for="scaleSlider" class="text-sm font-medium text-gray-400 whitespace-nowrap">Scale:</label>
<input id="scaleSlider" type="range" min="0.25" max="4" step="0.25" value="1" class="w-full" />
<span id="scaleValue" class="text-sm font-medium text-gray-400 w-12 text-right">1.00x</span>
</div>
<div class="flex items-center justify-center space-x-3">
<span class="text-sm font-medium text-gray-400">Overlay</span>
<label for="modeToggle" class="flex items-center cursor-pointer">
<div class="relative">
<input type="checkbox" id="modeToggle" class="sr-only" />
<div class="block bg-gray-600 w-14 h-8 rounded-full"></div>
<div class="dot absolute left-1 top-1 bg-white w-6 h-6 rounded-full transition transform"></div>
</div>
</label>
<span class="text-sm font-medium text-gray-400">Heatmap</span>
</div>
</div>
</div>
<div id="status" class="flex items-center justify-center w-full font-medium text-gray-400 h-6">
<svg id="spinner" class="animate-spin mr-3 h-5 w-5 text-blue-400 hidden" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
</svg>
<span id="statusText"></span>
</div>
<div id="canvasContainer" class="w-full bg-gray-900/50 rounded-lg border border-gray-700 shadow-inner overflow-hidden min-h-[250px] flex items-center justify-center p-2">
<canvas id="imageCanvas" class="hidden rounded-lg cursor-crosshair block max-w-full h-auto"></canvas>
<div id="canvasPlaceholder" class="text-gray-500">Your image will appear here</div>
</div>
</div>
</div>
<script type="module">
import { pipeline, RawImage, matmul } from "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]";
// ===== 1) Config =====
const MODEL_ID = "onnx-community/ijepa_vith14_1k";
const EXAMPLE_IMAGE_URL = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png";
const SUPPORTED_RESOLUTIONS = [224, 336, 448];
// DOM
const imageLoader = document.getElementById("imageLoader");
const exampleBtn = document.getElementById("exampleBtn");
const imageCanvas = document.getElementById("imageCanvas");
const ctx = imageCanvas.getContext("2d");
const spinner = document.getElementById("spinner");
const statusText = document.getElementById("statusText");
const canvasContainer = document.getElementById("canvasContainer");
const canvasPlaceholder = document.getElementById("canvasPlaceholder");
const dropZone = document.getElementById("dropZone");
const modeToggle = document.getElementById("modeToggle");
const scaleSlider = document.getElementById("scaleSlider");
const scaleValue = document.getElementById("scaleValue");
// State
let extractor = null;
let similarityScores = null;
let originalImage = null;
let currentImageUrl = null;
let isOverlayMode = true;
let lastHoverData = null;
let imageScale = 1.0;
let animationFrameId = null;
let lastMouseEvent = null;
let maxPixels = null;
let imageCropParams = null;
let modelPatchesPerRow = 0; // ** NEW STATE **
function updateStatus(text, isLoading=false){
statusText.textContent = text;
spinner.style.display = isLoading ? "block" : "none";
}
async function initialize(){
const isWebGpuSupported = !!navigator.gpu;
const isMobile = /Mobi|Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini/i.test(navigator.userAgent);
maxPixels = isMobile ? 1048576 : 2097152;
const device = isWebGpuSupported ? "webgpu" : "wasm";
const dtype = isWebGpuSupported ? "q4" : "q8";
updateStatus(`Loading I-JEPA (${device.toUpperCase()})...`, true);
try{
extractor = await pipeline("image-feature-extraction", MODEL_ID, { device, dtype });
if (extractor?.processor?.image_processor) extractor.processor.image_processor.do_resize = false;
updateStatus("Ready. Please select an image.");
}catch(e){
console.error(e);
updateStatus("Failed to load the model. Please refresh.");
}
imageLoader.addEventListener("change", handleImageUpload);
exampleBtn.addEventListener("click", handleExample);
imageCanvas.addEventListener("mousemove", handleMouseMove);
imageCanvas.addEventListener("mouseleave", clearHighlights);
imageCanvas.addEventListener("touchmove", handleTouchMove);
imageCanvas.addEventListener("touchend", clearHighlights);
dropZone.addEventListener("dragover", handleDragOver);
dropZone.addEventListener("dragleave", handleDragLeave);
dropZone.addEventListener("drop", handleDrop);
modeToggle.addEventListener("change", handleModeChange);
scaleSlider.addEventListener("input", handleSliderInput);
scaleSlider.addEventListener("change", handleSliderChange);
}
async function handleExample(){
updateStatus("Loading example image...", true);
try{
const res = await fetch(EXAMPLE_IMAGE_URL);
const blob = await res.blob();
loadImageOntoCanvas(URL.createObjectURL(blob));
}catch(e){ console.error(e); updateStatus("Failed to load example image."); }
}
function handleImageUpload(e){ if (e.target.files?.[0]) loadImageOntoCanvas(URL.createObjectURL(e.target.files[0])); }
function handleDragOver(e){ e.preventDefault(); dropZone.classList.add("border-blue-500","bg-gray-800"); }
function handleDragLeave(e){ e.preventDefault(); dropZone.classList.remove("border-blue-500","bg-gray-800"); }
function handleDrop(e){
e.preventDefault();
dropZone.classList.remove("border-blue-500","bg-gray-800");
const f = e.dataTransfer.files?.[0];
if (f && f.type.startsWith("image/")) loadImageOntoCanvas(URL.createObjectURL(f));
else updateStatus("Please drop an image file.");
}
function handleModeChange(e){
isOverlayMode = !e.target.checked;
if (lastHoverData) drawHighlights(lastHoverData.queryIndex, lastHoverData.allPatches);
else clearHighlights();
}
function handleSliderInput(e){ imageScale = parseFloat(e.target.value); scaleValue.textContent = `${imageScale.toFixed(2)}x`; }
function handleSliderChange(){ if (currentImageUrl) loadImageOntoCanvas(currentImageUrl); }
function findClosestSupportedResolution(targetDim) {
return SUPPORTED_RESOLUTIONS.reduce((prev, curr) =>
Math.abs(curr - targetDim) < Math.abs(prev - targetDim) ? curr : prev
);
}
function redrawOriginalImage() {
if (!originalImage || !imageCropParams) return;
ctx.drawImage(
originalImage,
imageCropParams.sx, imageCropParams.sy, imageCropParams.sWidth, imageCropParams.sHeight,
0, 0, imageCanvas.width, imageCanvas.height
);
}
function loadImageOntoCanvas(url){
currentImageUrl = url;
originalImage = new Image();
originalImage.onload = async () => {
canvasPlaceholder.style.display = "none";
imageCanvas.style.display = "block";
const { naturalWidth: w, naturalHeight: h } = originalImage;
const cropSize = Math.min(w, h);
const sx = (w - cropSize) / 2;
const sy = (h - cropSize) / 2;
imageCropParams = { sx, sy, sWidth: cropSize, sHeight: cropSize };
let scaledCropSize = cropSize * imageScale;
if (scaledCropSize * scaledCropSize > maxPixels) {
scaledCropSize = Math.sqrt(maxPixels);
}
let chosenResolution = findClosestSupportedResolution(scaledCropSize);
imageCanvas.width = chosenResolution;
imageCanvas.height = chosenResolution;
redrawOriginalImage();
await processImage(chosenResolution);
setTimeout(() => { canvasContainer.scrollIntoView({ behavior: "smooth", block: "center" }); }, 100);
};
originalImage.onerror = () => { updateStatus("Failed to load image."); canvasPlaceholder.style.display = "block"; imageCanvas.style.display = "none"; };
originalImage.src = url;
}
async function processImage(chosenResolution){
if (!extractor) return;
updateStatus("Analyzing with I‑JEPA... 🧠", true);
similarityScores = null; lastHoverData = null; modelPatchesPerRow = 0;
try{
const imageData = await RawImage.fromCanvas(imageCanvas);
const features = await extractor(imageData, { pooling: "none" });
const totalTokens = features.dims[1];
// Assuming the ONNX model output always contains a CLS token as the first token.
const nPatches = totalTokens - 1;
modelPatchesPerRow = Math.round(Math.sqrt(nPatches));
if (modelPatchesPerRow * modelPatchesPerRow !== nPatches) {
console.warn("Model output patch count is not a perfect square:", nPatches);
}
const patchFeatures = features.slice(null, [1, nPatches]);
const normalized = patchFeatures.normalize(2, -1);
const sims = await matmul(normalized, normalized.permute(0,2,1));
similarityScores = (await sims.tolist())[0];
updateStatus(`Image processed at ${chosenResolution}×${chosenResolution}. Hover to explore. ✨`);
}catch(err){
console.error("I‑JEPA processing error:", err);
updateStatus("An error occurred during processing. The image size might be unsupported.");
}
}
function handleTouchMove(e){ e.preventDefault(); if (e.touches.length > 0) handleMouseMove(e.touches[0]); }
function handleMouseMove(e){ lastMouseEvent = e; if (!animationFrameId) animationFrameId = requestAnimationFrame(drawLoop); }
function drawLoop(){
if (!lastMouseEvent || !similarityScores || !modelPatchesPerRow){ animationFrameId = null; return; }
const rect = imageCanvas.getBoundingClientRect();
const scaleX = imageCanvas.width / rect.width;
const scaleY = imageCanvas.height / rect.height;
const x = (lastMouseEvent.clientX - rect.left) * scaleX;
const y = (lastMouseEvent.clientY - rect.top) * scaleY;
if (x < 0 || x >= imageCanvas.width || y < 0 || y >= imageCanvas.height){ animationFrameId = null; return; }
// ** UPDATED LOGIC **
const patchDrawSize = imageCanvas.width / modelPatchesPerRow;
const patchX = Math.floor(x / patchDrawSize);
const patchY = Math.floor(y / patchDrawSize);
const qIdx = patchY * modelPatchesPerRow + patchX;
if (qIdx < 0 || qIdx >= similarityScores.length){ animationFrameId = null; return; }
const allPatches = Array.from(similarityScores[qIdx]).map((score, index) => ({ score, index }));
lastHoverData = { queryIndex: qIdx, allPatches };
drawHighlights(qIdx, allPatches);
animationFrameId = null;
}
const INFERNO_COLORMAP = [ [0.0,[0,0,4]],[0.1,[39,12,69]],[0.2,[84,15,104]],[0.3,[128,31,103]],[0.4,[170,48,88]], [0.5,[209,70,68]],[0.6,[240,97,47]],[0.7,[253,138,28]],[0.8,[252,185,26]],[0.9,[240,231,56]],[1.0,[252,255,160]] ];
function getInfernoColor(t){ for (let i=1;i<INFERNO_COLORMAP.length;i++){ const [tp,cp]=INFERNO_COLORMAP[i-1]; const [tc,cc]=INFERNO_COLORMAP[i]; if (t<=tc){ const a=(t-tp)/(tc-tp); const r=cp[0]+a*(cc[0]-cp[0]); const g=cp[1]+a*(cc[1]-cp[1]); const b=cp[2]+a*(cc[2]-cp[2]); return `rgb(${Math.round(r)}, ${Math.round(g)}, ${Math.round(b)})`; } } const last=INFERNO_COLORMAP[INFERNO_COLORMAP.length-1][1]; return `rgb(${last.join(",")})`; }
function drawHighlights(queryIndex, allPatches){
if (!modelPatchesPerRow) return;
// ** UPDATED LOGIC **
const patchDrawSize = imageCanvas.width / modelPatchesPerRow;
if (isOverlayMode){
redrawOriginalImage();
ctx.fillStyle = "rgba(0,0,0,0.6)"; ctx.fillRect(0,0,imageCanvas.width,imageCanvas.height);
} else {
ctx.fillStyle = getInfernoColor(0); ctx.fillRect(0,0,imageCanvas.width,imageCanvas.height);
}
if (allPatches.length > 0){
const scores = allPatches.map(p => p.score);
const minS = Math.min(...scores); const maxS = Math.max(...scores); const rng = maxS - minS;
for (const p of allPatches){
if (p.index === queryIndex) continue;
const t = rng > 1e-4 ? (p.score - minS) / rng : 1;
// ** UPDATED LOGIC **
const py = Math.floor(p.index / modelPatchesPerRow);
const px = p.index % modelPatchesPerRow;
if (isOverlayMode){ const a = Math.pow(t, 2) * 0.8; ctx.fillStyle = `rgba(255,255,255,${a})`; }
else { ctx.fillStyle = getInfernoColor(t); }
ctx.fillRect(px * patchDrawSize, py * patchDrawSize, patchDrawSize, patchDrawSize);
}
}
// ** UPDATED LOGIC **
const qy = Math.floor(queryIndex / modelPatchesPerRow);
const qx = queryIndex % modelPatchesPerRow;
ctx.strokeStyle = isOverlayMode ? "rgba(129,188,255,0.9)" : "cyan";
ctx.lineWidth = 2; ctx.strokeRect(qx * patchDrawSize, qy * patchDrawSize, patchDrawSize, patchDrawSize);
}
function clearHighlights(){
if (animationFrameId){ cancelAnimationFrame(animationFrameId); animationFrameId = null; }
lastMouseEvent = null; lastHoverData = null;
if (originalImage) redrawOriginalImage();
}
initialize();
</script>
</body>
</html>