Quantization and tracing
#1
by
dreidizzle
- opened
Hi! Is there any example code to quantize this model and trace it? Getting some errors trying to trace it so I think the versions I have are not compatible and I wonder if this was tested / done by someone
Getting nan values in both image and query embeddings using the example described above with the only change being in the first url1;
import requests
import torch
from PIL import Image
import io
from transformers import ColQwen2ForRetrieval, ColQwen2Processor
from transformers.utils.import_utils import is_flash_attn_2_available
# Load the model and the processor
model_name = "colqwen2-v1.0-hf"
model = ColQwen2ForRetrieval.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto", # "cpu", "cuda", or "mps" for Apple Silicon
attn_implementation="flash_attention_2" if is_flash_attn_2_available() else "sdpa",
local_files_only=True,
trust_remote_code=True,
)
processor = ColQwen2Processor.from_pretrained(model_name)
# The document page screenshots from your corpus
url1 = "https://encyclopediavirginia.org/wp-content/uploads/2020/11/1704_7bb4330869fd374.jpg"
url2 = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/4c/Romeoandjuliet1597.jpg/500px-Romeoandjuliet1597.jpg"
images = [
Image.open(io.BytesIO(requests.get(url1).content)),
Image.open(io.BytesIO(requests.get(url2).content)),
]
# The queries you want to retrieve documents for
queries = [
"When was the United States Declaration of Independence proclaimed?",
"Who printed the edition of Romeo and Juliet?",
]
# Process the inputs
inputs_images = processor(images=images).to(model.device)
inputs_text = processor(text=queries).to(model.device)
# Forward pass
with torch.no_grad():
image_embeddings = model(**inputs_images).embeddings
query_embeddings = model(**inputs_text).embeddings
# Score the queries against the images
scores = processor.score_retrieval(query_embeddings, image_embeddings)
print("Retrieval scores (query x image):")
print(scores)
Here is the output of the embeddings
Image embeddings
tensor([[[ 0.0199, -0.0850, -0.0713, ..., 0.0535, -0.0033, 0.1099],
[ 0.0194, -0.1084, -0.0272, ..., 0.0762, -0.0449, -0.1396],
[-0.0012, -0.0996, -0.0198, ..., 0.0762, -0.0334, -0.1299],
...,
[ 0.0503, -0.1069, -0.0835, ..., 0.0559, 0.0200, 0.0476],
[ 0.0267, -0.0918, -0.0317, ..., 0.0496, -0.0342, -0.1436],
[ 0.0513, -0.0366, -0.0205, ..., 0.0464, 0.0007, -0.1943]],
[[ nan, nan, nan, ..., nan, nan, nan],
[ nan, nan, nan, ..., nan, nan, nan],
[ nan, nan, nan, ..., nan, nan, nan],
...,
[ nan, nan, nan, ..., nan, nan, nan],
[ nan, nan, nan, ..., nan, nan, nan],
[ nan, nan, nan, ..., nan, nan, nan]]],
device='mps:0', dtype=torch.bfloat16)
Query embeddings
tensor([[[ 0.0240, -0.0845, -0.0752, ..., 0.0547, 0.0081, 0.0913],
[-0.0079, -0.1021, -0.0198, ..., 0.0713, -0.0315, -0.1357],
[-0.1123, -0.0167, 0.0126, ..., 0.0859, 0.0244, 0.0383],
...,
[ 0.0132, 0.0581, 0.0096, ..., 0.0049, -0.0244, -0.0820],
[ 0.0251, 0.0518, 0.0103, ..., -0.0028, -0.0327, -0.0947],
[ 0.0430, 0.0479, 0.0155, ..., -0.0122, -0.0330, -0.1123]],
[[ nan, nan, nan, ..., nan, nan, nan],
[ nan, nan, nan, ..., nan, nan, nan],
[ nan, nan, nan, ..., nan, nan, nan],
...,
[ nan, nan, nan, ..., nan, nan, nan],
[ nan, nan, nan, ..., nan, nan, nan],
[ nan, nan, nan, ..., nan, nan, nan]]],
device='mps:0', dtype=torch.bfloat16)