Visual Document Retrieval
Transformers
Safetensors
ColPali
English
colqwen2
pretraining

Quantization and tracing

#1
by dreidizzle - opened

Hi! Is there any example code to quantize this model and trace it? Getting some errors trying to trace it so I think the versions I have are not compatible and I wonder if this was tested / done by someone

Getting nan values in both image and query embeddings using the example described above with the only change being in the first url1;

import requests
import torch
from PIL import Image
import io

from transformers import ColQwen2ForRetrieval, ColQwen2Processor
from transformers.utils.import_utils import is_flash_attn_2_available


# Load the model and the processor
model_name = "colqwen2-v1.0-hf"

model = ColQwen2ForRetrieval.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",  # "cpu", "cuda", or "mps" for Apple Silicon
    attn_implementation="flash_attention_2" if is_flash_attn_2_available() else "sdpa",
    local_files_only=True, 
    trust_remote_code=True,
    
)
processor = ColQwen2Processor.from_pretrained(model_name)


# The document page screenshots from your corpus
url1 = "https://encyclopediavirginia.org/wp-content/uploads/2020/11/1704_7bb4330869fd374.jpg"
url2 = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/4c/Romeoandjuliet1597.jpg/500px-Romeoandjuliet1597.jpg"

images = [
    Image.open(io.BytesIO(requests.get(url1).content)),
    Image.open(io.BytesIO(requests.get(url2).content)),
]

# The queries you want to retrieve documents for
queries = [
    "When was the United States Declaration of Independence proclaimed?",
    "Who printed the edition of Romeo and Juliet?",
]

# Process the inputs
inputs_images = processor(images=images).to(model.device)
inputs_text = processor(text=queries).to(model.device)

# Forward pass
with torch.no_grad():
    image_embeddings = model(**inputs_images).embeddings
    query_embeddings = model(**inputs_text).embeddings

# Score the queries against the images
scores = processor.score_retrieval(query_embeddings, image_embeddings)

print("Retrieval scores (query x image):")
print(scores)

Here is the output of the embeddings

Image embeddings

tensor([[[ 0.0199, -0.0850, -0.0713,  ...,  0.0535, -0.0033,  0.1099],
         [ 0.0194, -0.1084, -0.0272,  ...,  0.0762, -0.0449, -0.1396],
         [-0.0012, -0.0996, -0.0198,  ...,  0.0762, -0.0334, -0.1299],
         ...,
         [ 0.0503, -0.1069, -0.0835,  ...,  0.0559,  0.0200,  0.0476],
         [ 0.0267, -0.0918, -0.0317,  ...,  0.0496, -0.0342, -0.1436],
         [ 0.0513, -0.0366, -0.0205,  ...,  0.0464,  0.0007, -0.1943]],

        [[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         ...,
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan]]],
       device='mps:0', dtype=torch.bfloat16)

Query embeddings

tensor([[[ 0.0240, -0.0845, -0.0752,  ...,  0.0547,  0.0081,  0.0913],
         [-0.0079, -0.1021, -0.0198,  ...,  0.0713, -0.0315, -0.1357],
         [-0.1123, -0.0167,  0.0126,  ...,  0.0859,  0.0244,  0.0383],
         ...,
         [ 0.0132,  0.0581,  0.0096,  ...,  0.0049, -0.0244, -0.0820],
         [ 0.0251,  0.0518,  0.0103,  ..., -0.0028, -0.0327, -0.0947],
         [ 0.0430,  0.0479,  0.0155,  ..., -0.0122, -0.0330, -0.1123]],

        [[    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         ...,
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan]]],
       device='mps:0', dtype=torch.bfloat16)

Sign up or log in to comment