Spaces:
Running
Running
updated instructions and interface
Browse files- app.py +20 -10
- rankings/image_ranking.csv +62 -0
- rankings/image_ranking.jsonl +61 -0
- rankings/mmeb_ranking.csv +25 -0
- rankings/mmeb_ranking.jsonl +24 -0
- rankings/video_ranking.csv +25 -0
- rankings/video_ranking.jsonl +24 -0
- rankings/visdoc_ranking.csv +25 -0
- rankings/visdoc_ranking.jsonl +24 -0
- utils.py +25 -25
- utils_v2.py +11 -1
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from utils import *
|
| 2 |
import utils_v2 as v2
|
|
|
|
| 3 |
|
| 4 |
global data_component
|
| 5 |
|
|
@@ -21,7 +22,7 @@ with gr.Blocks() as block:
|
|
| 21 |
|
| 22 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 23 |
# Table 1, the main leaderboard of overall scores
|
| 24 |
-
with gr.TabItem("📊 MMEB (V2)", elem_id="
|
| 25 |
with gr.Row():
|
| 26 |
with gr.Accordion("Citation", open=False):
|
| 27 |
citation_button2 = gr.Textbox(
|
|
@@ -58,8 +59,9 @@ with gr.Blocks() as block:
|
|
| 58 |
step=0.1,
|
| 59 |
label="Maximum number of parameters (B)",
|
| 60 |
)
|
|
|
|
| 61 |
data_component2 = gr.components.Dataframe(
|
| 62 |
-
value=
|
| 63 |
headers=v2.COLUMN_NAMES,
|
| 64 |
type="pandas",
|
| 65 |
datatype=v2.DATA_TITLE_TYPE,
|
|
@@ -69,6 +71,9 @@ with gr.Blocks() as block:
|
|
| 69 |
)
|
| 70 |
|
| 71 |
refresh_button2 = gr.Button("Refresh")
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
def update_with_tasks_v2(*args):
|
| 74 |
return update_table_v2(*args)
|
|
@@ -104,7 +109,7 @@ with gr.Blocks() as block:
|
|
| 104 |
return combined_df[v2.COLUMN_NAMES_I]
|
| 105 |
|
| 106 |
# table 2, image scores only
|
| 107 |
-
with gr.TabItem("🖼️ Image (Previously MMEB-V1)", elem_id="
|
| 108 |
gr.Markdown(v2.TABLE_INTRODUCTION_I)
|
| 109 |
df2_i = get_special_processed_df2()
|
| 110 |
data_component3 = gr.components.Dataframe(
|
|
@@ -116,12 +121,14 @@ with gr.Blocks() as block:
|
|
| 116 |
visible=True,
|
| 117 |
max_height=2400,
|
| 118 |
)
|
|
|
|
| 119 |
|
| 120 |
# table 3, video scores only
|
| 121 |
-
with gr.TabItem("💽 Video", elem_id="
|
| 122 |
gr.Markdown(v2.TABLE_INTRODUCTION_V)
|
|
|
|
| 123 |
data_component4 = gr.components.Dataframe(
|
| 124 |
-
value=
|
| 125 |
headers=v2.COLUMN_NAMES_V,
|
| 126 |
type="pandas",
|
| 127 |
datatype=v2.DATA_TITLE_TYPE_V,
|
|
@@ -129,12 +136,14 @@ with gr.Blocks() as block:
|
|
| 129 |
visible=True,
|
| 130 |
max_height=2400,
|
| 131 |
)
|
|
|
|
| 132 |
|
| 133 |
# table 4, visual document scores only
|
| 134 |
-
with gr.TabItem("📑 Visual Doc", elem_id="
|
| 135 |
gr.Markdown(v2.TABLE_INTRODUCTION_D)
|
|
|
|
| 136 |
data_component5 = gr.components.Dataframe(
|
| 137 |
-
value=
|
| 138 |
headers=v2.COLUMN_NAMES_D,
|
| 139 |
type="pandas",
|
| 140 |
datatype=v2.DATA_TITLE_TYPE_D,
|
|
@@ -142,14 +151,15 @@ with gr.Blocks() as block:
|
|
| 142 |
visible=True,
|
| 143 |
max_height=2400,
|
| 144 |
)
|
|
|
|
| 145 |
|
| 146 |
# table 5
|
| 147 |
-
with gr.TabItem("📝 About", elem_id="
|
| 148 |
gr.Markdown(LEADERBOARD_INFO, elem_classes="markdown-text")
|
| 149 |
-
gr.Image("overview.png", width=900, label="Dataset Overview")
|
| 150 |
|
| 151 |
# table 6
|
| 152 |
-
with gr.TabItem("🚀 Submit here! ", elem_id="submit
|
| 153 |
with gr.Row():
|
| 154 |
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
|
| 155 |
|
|
|
|
| 1 |
from utils import *
|
| 2 |
import utils_v2 as v2
|
| 3 |
+
import gradio as gr
|
| 4 |
|
| 5 |
global data_component
|
| 6 |
|
|
|
|
| 22 |
|
| 23 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 24 |
# Table 1, the main leaderboard of overall scores
|
| 25 |
+
with gr.TabItem("📊 MMEB (V2)", elem_id="tab-overall", id=1):
|
| 26 |
with gr.Row():
|
| 27 |
with gr.Accordion("Citation", open=False):
|
| 28 |
citation_button2 = gr.Textbox(
|
|
|
|
| 59 |
step=0.1,
|
| 60 |
label="Maximum number of parameters (B)",
|
| 61 |
)
|
| 62 |
+
df2_all = df2[v2.COLUMN_NAMES]
|
| 63 |
data_component2 = gr.components.Dataframe(
|
| 64 |
+
value=df2_all,
|
| 65 |
headers=v2.COLUMN_NAMES,
|
| 66 |
type="pandas",
|
| 67 |
datatype=v2.DATA_TITLE_TYPE,
|
|
|
|
| 71 |
)
|
| 72 |
|
| 73 |
refresh_button2 = gr.Button("Refresh")
|
| 74 |
+
|
| 75 |
+
# save a summary of rankings
|
| 76 |
+
v2.save_ranking_summary(df2[v2.COLUMN_NAMES], 'mmeb_ranking')
|
| 77 |
|
| 78 |
def update_with_tasks_v2(*args):
|
| 79 |
return update_table_v2(*args)
|
|
|
|
| 109 |
return combined_df[v2.COLUMN_NAMES_I]
|
| 110 |
|
| 111 |
# table 2, image scores only
|
| 112 |
+
with gr.TabItem("🖼️ Image (Previously MMEB-V1)", elem_id="tab-image", id=2):
|
| 113 |
gr.Markdown(v2.TABLE_INTRODUCTION_I)
|
| 114 |
df2_i = get_special_processed_df2()
|
| 115 |
data_component3 = gr.components.Dataframe(
|
|
|
|
| 121 |
visible=True,
|
| 122 |
max_height=2400,
|
| 123 |
)
|
| 124 |
+
v2.save_ranking_summary(df2_i, 'image_ranking')
|
| 125 |
|
| 126 |
# table 3, video scores only
|
| 127 |
+
with gr.TabItem("💽 Video", elem_id="tab-video", id=3):
|
| 128 |
gr.Markdown(v2.TABLE_INTRODUCTION_V)
|
| 129 |
+
df2_v = v2.rank_models(df2[v2.COLUMN_NAMES_V], 'Video-Overall')
|
| 130 |
data_component4 = gr.components.Dataframe(
|
| 131 |
+
value=df2_v,
|
| 132 |
headers=v2.COLUMN_NAMES_V,
|
| 133 |
type="pandas",
|
| 134 |
datatype=v2.DATA_TITLE_TYPE_V,
|
|
|
|
| 136 |
visible=True,
|
| 137 |
max_height=2400,
|
| 138 |
)
|
| 139 |
+
v2.save_ranking_summary(df2_v, 'video_ranking')
|
| 140 |
|
| 141 |
# table 4, visual document scores only
|
| 142 |
+
with gr.TabItem("📑 Visual Doc", elem_id="tab-visdoc", id=4):
|
| 143 |
gr.Markdown(v2.TABLE_INTRODUCTION_D)
|
| 144 |
+
df2_d = v2.rank_models(df2[v2.COLUMN_NAMES_D], 'Visdoc-Overall')
|
| 145 |
data_component5 = gr.components.Dataframe(
|
| 146 |
+
value=df2_d,
|
| 147 |
headers=v2.COLUMN_NAMES_D,
|
| 148 |
type="pandas",
|
| 149 |
datatype=v2.DATA_TITLE_TYPE_D,
|
|
|
|
| 151 |
visible=True,
|
| 152 |
max_height=2400,
|
| 153 |
)
|
| 154 |
+
v2.save_ranking_summary(df2_d, 'visdoc_ranking')
|
| 155 |
|
| 156 |
# table 5
|
| 157 |
+
with gr.TabItem("📝 About", elem_id="tab-about", id=5):
|
| 158 |
gr.Markdown(LEADERBOARD_INFO, elem_classes="markdown-text")
|
| 159 |
+
# gr.Image("overview.png", width=900, label="Dataset Overview")
|
| 160 |
|
| 161 |
# table 6
|
| 162 |
+
with gr.TabItem("🚀 Submit here! ", elem_id="tab-submit", id=6):
|
| 163 |
with gr.Row():
|
| 164 |
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
|
| 165 |
|
rankings/image_ranking.csv
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Rank,Models,Model Size(B),Image-Overall,I-CLS,I-QA,I-RET,I-VG,VOC2007,N24News,SUN397,ObjectNet,Country211,Place365,ImageNet-1K,HatefulMemes,ImageNet-A,ImageNet-R,OK-VQA,A-OKVQA,DocVQA,InfographicsVQA,ChartQA,Visual7W,ScienceQA,GQA,TextVQA,VizWiz,VisDial,CIRR,VisualNews_t2i,VisualNews_i2t,MSCOCO_t2i,MSCOCO_i2t,NIGHTS,WebQA,FashionIQ,Wiki-SS-NQ,OVEN,EDIS,MSCOCO,RefCOCO,RefCOCO-Matching,Visual7W-Pointing
|
| 2 |
+
1,"<a href=""https://interestfm-tte.github.io/"">IFM-TTE-7B</a>",8.29,77.9,76.74,78.49,74.58,89.3,83.4,82.1,78.8,74.0,60.5,65.5,84.0,75.1,73.6,90.4,83.4,77.3,95.5,82.3,83.2,73.8,67.3,79.1,87.9,55.1,84.6,66.4,79.2,82.5,77.7,72.6,69.4,91.1,32.4,73.6,69.9,95.5,85.4,91.7,91.9,88.2
|
| 3 |
+
2,"<a href=""https://seed1-6-embedding.github.io"">seed-1.6-embedding</a>",unknown,77.78,76.06,73.97,77.9,91.25,91.9,82.9,80.8,85.4,47.7,52.8,84.2,73.7,73.1,88.1,74.2,64.6,96.2,82.9,69.2,59.6,79.2,74.7,85.1,54.0,84.6,65.0,83.4,84.5,78.8,77.4,72.1,91.7,49.0,79.6,77.2,91.5,82.1,95.5,93.0,94.4
|
| 4 |
+
3,"<a href=""https://github.com/360CVGroup/RzenEmbed"">RzenEmbed-v2-7B</a>",8.29,75.92,70.61,71.67,78.5,92.1,91.4,84.0,82.7,72.0,29.7,48.1,84.1,62.4,61.5,90.2,73.1,63.1,94.6,76.8,69.2,63.2,60.7,74.6,86.3,55.1,85.6,67.2,82.6,85.8,81.1,78.0,69.0,92.4,41.5,79.8,81.8,97.2,83.5,94.9,93.2,96.8
|
| 5 |
+
4,"<a href=""https://github.com/QQ-MM/QQMM-embed"">QQMM-embed-v2</a>",8.29,75.28,72.97,71.85,76.01,87.42,92.8,82.4,83.7,74.2,30.8,48.8,83.5,79.6,62.7,91.2,73.8,71.1,96.0,74.0,68.1,65.3,62.7,62.4,88.2,56.9,85.5,69.5,80.5,83.9,82.6,80.1,67.0,92.7,32.2,74.2,72.5,91.4,81.5,93.5,92.9,81.8
|
| 6 |
+
5,OEmbedding-v1-7B,8.29,74.05,70.56,70.02,74.67,90.97,89.2,82.0,79.0,72.6,31.2,46.0,84.5,70.5,59.9,90.7,73.2,63.3,95.2,74.4,69.4,60.5,54.8,68.9,86.2,54.3,85.8,65.9,82.5,86.0,81.7,76.8,68.2,90.8,25.6,69.9,73.5,89.4,81.8,95.6,94.1,92.4
|
| 7 |
+
6,ReCo-7B,8.29,73.87,70.95,71.52,73.66,87.7,88.8,83.8,81.2,74.1,28.0,47.4,84.2,73.6,58.3,90.1,74.1,61.8,95.1,76.3,66.7,67.2,54.5,76.8,87.3,55.4,85.3,60.7,81.4,84.3,79.5,74.0,68.7,90.7,20.6,72.1,74.0,92.6,74.1,93.5,94.1,89.1
|
| 8 |
+
7,RzenEmbed-v1-7B,8.29,73.6,69.78,68.72,76.83,85.67,91.6,81.7,82.4,68.5,29.0,43.5,82.8,70.0,59.0,89.3,69.6,61.3,94.8,73.6,64.0,60.5,58.2,65.5,86.2,53.5,83.1,66.3,82.6,85.8,78.6,76.0,68.6,91.3,38.7,74.8,79.6,96.5,78.3,88.3,85.1,91.0
|
| 9 |
+
8,"<a href=""https://huggingface.co/OpenSearch-AI/Ops-MM-embedding-v1-7B"">Ops-MM-embedding-v1-7B</a>",8.29,72.72,69.65,69.58,73.09,87.15,84.8,82.1,81.0,69.7,28.5,45.6,81.1,75.7,57.8,90.2,70.6,60.0,94.7,73.6,65.2,58.4,49.9,79.9,86.9,56.6,81.8,55.2,80.1,84.3,79.3,72.1,66.2,91.9,24.3,74.3,73.2,94.4,73.9,90.4,92.7,91.6
|
| 10 |
+
9,TCE-v1,8.0,72.36,67.89,70.28,72.31,88.85,91.6,81.7,78.8,53.8,20.2,46.4,81.3,75.4,59.6,90.1,71.5,61.4,95.0,81.0,72.0,57.8,58.7,69.4,83.6,52.4,86.7,57.8,77.9,81.8,79.0,77.3,68.1,89.3,25.6,66.2,65.4,92.6,85.2,93.7,91.4,85.1
|
| 11 |
+
10,"<a href=""https://github.com/QQ-MM/QQMM-embed"">QQMM-embed</a>",8.297,72.175,70.07,69.52,71.175,87.075,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 12 |
+
11,"<a href=""https://huggingface.co/raghavlite/B3_Qwen2_7B"">B3_Qwen2_7B</a>",8.29,72.0,70.0,66.5,74.1,84.6,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 13 |
+
12,"<a href=""https://github.com/GaryGuTC/UniME-v2"">UniME-V2-LLaVA-OneVision-7B</a>",8.03,71.77,65.64,68.66,73.1,90.85,92.5,66.0,78.8,73.2,19.0,43.8,78.9,66.1,49.0,89.1,71.7,71.5,92.4,67.1,59.2,62.7,55.2,69.0,84.4,53.4,84.8,67.0,77.3,80.1,80.0,74.6,68.3,90.2,27.0,70.9,68.5,88.5,81.3,95.3,92.8,94.0
|
| 14 |
+
13,"<a href=""https://huggingface.co/zhibinlan/UME-R1-7B"">UME-R1-7B</a>",8.29,71.25,67.09,69.18,71.9,84.85,90.8,82.3,80.3,42.3,25.0,46.8,80.4,79.0,53.9,90.1,71.7,58.7,93.8,79.2,75.1,55.2,53.7,69.3,83.5,51.6,80.7,55.3,76.8,82.0,78.3,71.4,68.1,90.9,23.4,72.5,71.4,92.0,72.7,91.4,91.1,84.2
|
| 15 |
+
14,"<a href=""https://huggingface.co/DeepGlint-AI/UniME-LLaVA-OneVision-7B"">UniME(LLaVA-OneVision-7B-LoRA-Res336)</a>",8.03,70.7,66.8,66.6,70.5,90.9,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 16 |
+
15,"<a href=""https://huggingface.co/zhibinlan/LLaVE-7B"">LLaVE-7B</a>",8.03,70.3,65.7,65.4,70.9,91.9,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 17 |
+
16,"<a href=""https://huggingface.co/friedrichor/Unite-Instruct-Qwen2-VL-7B"">UNITE-Instruct-7B</a>",8.29,70.3,68.3,65.1,71.6,84.8,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 18 |
+
17,"<a href=""https://huggingface.co/intfloat/mmE5-mllama-11b-instruct"">mmE5-mllama-11b-instruct</a>",10.6,69.8,67.6,62.6,71.0,89.6,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 19 |
+
18,"<a href=""https://arxiv.org/pdf/2503.19900"">interestFM-UIR-CAFe-7B</a>",8.03,69.8,65.2,65.6,70.0,91.2,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 20 |
+
19,"<a href=""https://huggingface.co/BAAI/BGE-VL-v1.5-mmeb"">BGE-VL-v1.5 (FT; LlaVA-1.6-Mistral)</a>",7.57,69.4,63.7,64.9,72.2,86.6,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 21 |
+
20,"<a href=""https://huggingface.co/OpenSearch-AI/Ops-MM-embedding-v1-2B"">Ops-MM-embedding-v1-2B</a>",2.21,69.03,68.07,65.11,69.17,80.85,81.3,78.4,80.7,68.4,28.6,43.9,81.1,74.0,53.1,91.2,64.1,54.0,93.0,65.3,56.1,58.5,47.3,74.4,83.1,55.3,79.4,47.3,76.2,79.1,74.1,69.7,66.1,91.1,18.5,68.8,69.1,90.6,66.8,84.1,87.9,84.6
|
| 22 |
+
21,RzenEmbed-v1-2B,2.21,68.53,65.29,61.7,73.81,77.85,89.6,74.0,78.4,71.3,24.5,41.3,80.9,58.6,49.5,84.8,60.0,53.2,90.5,59.3,51.8,57.1,47.5,60.4,83.8,53.4,79.1,62.7,78.0,81.4,76.7,73.6,67.9,91.0,33.8,74.9,74.6,92.0,68.5,86.0,74.8,82.1
|
| 23 |
+
22,"<a href=""https://huggingface.co/raghavlite/B3_Qwen2_2B"">B3_Qwen2_2B</a>",2.21,68.1,67.0,61.19,70.85,79.88,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 24 |
+
23,"<a href=""https://arxiv.org/abs/2503.19900"">interestFM-UIR-CAFe-7B</a>",8.03,67.56,63.63,61.66,69.07,87.58,89.8,83.2,79.9,22.5,16.7,45.0,77.3,78.7,55.2,88.0,67.3,63.8,79.2,53.3,48.8,52.5,65.4,65.7,76.8,43.8,82.7,60.4,69.5,79.4,75.4,73.1,66.7,89.3,39.0,61.2,60.8,71.3,84.7,89.4,83.0,93.2
|
| 25 |
+
24,"<a href=""https://huggingface.co/DeepGlint-AI/UniME-LLaVA-1.6-7B"">UniME(LLaVA-1.6-7B-LoRA-LowRes)</a>",7.57,66.6,60.6,52.9,67.9,85.1,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 26 |
+
25,"<a href=""https://huggingface.co/zhibinlan/UME-R1-2B"">UME-R1-2B</a>",2.21,66.56,64.81,62.78,67.62,77.17,80.0,81.1,79.4,52.0,23.4,42.6,75.3,75.2,50.4,88.7,62.4,51.1,92.2,67.7,64.9,54.1,42.7,67.3,78.6,46.8,76.6,53.7,71.7,74.2,75.1,68.9,67.2,90.0,17.1,62.0,66.9,88.0,69.5,83.3,84.4,71.5
|
| 27 |
+
26,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-7B"">VLM2Vec (Qwen2-VL-7B-LoRA-HighRes)</a>",8.29,65.8,62.6,57.8,69.9,81.7,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 28 |
+
27,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-7B"">VLM2Vec-V1-Qwen2VL-7B</a>",8.29,65.49,62.69,56.85,69.44,82.22,80.7,79.7,77.4,40.1,29.8,37.4,80.1,69.7,58.1,73.9,56.8,47.3,89.7,60.0,56.9,52.7,38.5,55.1,71.6,39.9,81.9,51.1,80.5,81.2,77.2,73.9,67.6,88.3,17.1,62.3,66.5,85.7,75.7,87.6,84.6,81.0
|
| 29 |
+
28,"<a href=""https://huggingface.co/zhibinlan/LLaVE-2B"">LLaVE-2B</a>",1.95,65.2,62.1,60.2,65.2,84.9,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 30 |
+
29,"<a href=""https://huggingface.co/VLM2Vec/VLM2Vec-V2.0"">VLM2Vec-V2.0-Qwen2VL-2B</a>",2.21,64.85,62.9,56.29,69.47,77.3,85.0,72.9,71.0,65.2,25.2,35.9,80.8,56.3,47.4,89.3,51.5,43.6,90.1,58.8,47.4,52.9,38.2,64.9,72.2,43.3,82.7,57.5,74.5,78.2,75.3,71.4,68.6,90.6,19.5,66.9,64.3,84.1,67.1,87.1,85.8,69.2
|
| 31 |
+
30,"<a href=""https://huggingface.co/DeepGlint-AI/UniME-Phi3.5-V-4.2B"">UniME(Phi-3.5-V-LoRA)</a>",4.2,64.2,54.8,55.9,64.5,81.8,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 32 |
+
31,"<a href=""https://huggingface.co/JUNJIE99/MMRet-large"">MMRet-MLLM (FT)</a>",7.57,64.1,56.0,57.4,69.9,83.6,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 33 |
+
32,"<a href=""https://huggingface.co/friedrichor/Unite-Instruct-Qwen2-VL-2B"">UNITE-Instruct-2B</a>",2.21,63.3,63.2,55.9,65.4,75.6,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 34 |
+
33,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-LLaVa-Next"">VLM2Vec (LLaVA-1.6-LoRA-HighRes)</a>",7.57,62.9,61.2,49.9,67.4,86.1,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 35 |
+
34,"<a href=""https://huggingface.co/BAAI/BGE-VL-v1.5-zs"">BGE-VL-v1.5 (zeroshot; LlaVA-1.6-Mistral)</a>",7.57,60.1,56.1,55.3,63.9,70.8,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 36 |
+
35,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-Full"">VLM2Vec (Phi-3.5-V-LoRA)</a>",4.15,60.1,54.8,54.9,62.3,79.5,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 37 |
+
36,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-2B"">VLM2Vec-V1-Qwen2VL-2B</a>",2.21,59.74,58.71,49.26,64.98,72.85,74.3,73.7,73.8,37.1,21.5,35.3,77.5,58.3,50.9,84.7,48.5,39.5,82.5,47.7,42.3,51.2,30.7,48.3,63.3,38.6,74.3,46.8,73.1,73.7,73.4,68.5,66.3,85.9,14.0,54.2,68.3,81.2,66.5,80.9,75.7,68.3
|
| 38 |
+
37,"<a href=""https://arxiv.org/pdf/2503.19900"">interestFM-UIR-CAFe-0.5B</a>",0.894,59.6,59.1,49.1,61.0,83.0,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 39 |
+
38,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-2B"">VLM2Vec (Qwen2-VL-2B-LoRA-HighRes)</a>",2.21,59.3,59.0,49.4,65.4,73.4,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 40 |
+
39,"<a href=""https://huggingface.co/zhibinlan/LLaVE-0.5B"">LLaVE-0.5B</a>",0.894,59.1,57.4,50.3,59.8,82.9,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 41 |
+
40,"<a href=""https://huggingface.co/intfloat/mmE5-mllama-11b-instruct"">mmE5 (w/ 560K synthetic data)</a>",10.6,58.6,60.6,55.7,54.7,72.4,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 42 |
+
41,"<a href=""https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-7B-Instruct"">gme-Qwen2-VL-7B-Instruct</a>",8.29,55.95,57.65,34.66,71.17,59.3,80.3,50.5,69.5,69.0,24.8,39.1,64.6,53.6,41.2,83.9,33.2,21.0,41.4,20.3,17.8,22.2,28.0,76.9,46.8,39.0,60.8,54.9,79.7,83.6,71.2,57.7,67.6,91.4,37.8,78.2,75.1,96.0,31.4,60.9,78.4,66.5
|
| 43 |
+
42,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-Full"">VLM2Vec (Phi-3.5-V-FT)</a>",4.15,55.9,52.8,50.3,57.8,72.3,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 44 |
+
43,"<a href=""https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct"">gme-Qwen2-VL-2B-Instruct</a>",2.21,55.8,56.9,41.2,67.8,53.4,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 45 |
+
44,"<a href=""https://arxiv.org/abs/2503.19900"">interestFM-UIR-CAFe-0.5B</a>",0.894,55.43,56.44,45.28,57.55,71.95,81.9,69.6,68.8,51.0,11.3,37.4,64.6,55.3,38.1,86.4,47.1,43.0,60.6,30.2,32.7,48.9,39.7,52.1,59.6,38.9,68.4,43.4,58.4,61.0,66.4,63.2,63.8,80.0,23.3,46.4,52.4,63.9,63.1,76.6,70.6,77.5
|
| 46 |
+
45,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-LLaVa-Next"">VLM2Vec (LLaVA-1.6-LoRA-LowRes)</a>",7.57,55.0,54.7,50.3,56.2,64.0,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 47 |
+
46,"<a href=""https://huggingface.co/code-kunkun/LamRA-Ret"">LamRA-Ret</a>",8.29,54.08,59.2,26.47,69.95,62.65,80.1,51.3,68.5,66.4,28.3,40.6,72.3,49.0,47.0,88.5,37.8,27.0,22.3,16.5,11.7,19.6,26.3,38.5,33.0,32.0,61.3,51.7,70.4,83.9,72.2,73.7,65.6,81.0,42.0,69.7,82.0,85.9,44.8,62.8,75.7,67.3
|
| 48 |
+
47,"<a href=""https://huggingface.co/code-kunkun/LamRA-Ret-Qwen2.5VL-7b"">LamRA-Ret-Qwen2.5VL-7b</a>",8.29,52.43,51.7,34.12,66.86,56.73,78.7,29.8,66.5,59.4,21.7,37.4,58.9,51.3,36.3,77.0,39.9,34.1,37.1,23.7,15.0,24.6,31.3,57.4,46.1,32.0,62.5,44.7,70.1,74.2,65.7,71.1,64.4,85.7,33.4,67.0,84.8,78.7,36.0,57.1,82.6,51.2
|
| 49 |
+
48,"<a href=""https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct"">gme-Qwen2-VL-2B-Instruct</a>",2.21,51.89,54.44,29.86,66.93,55.47,75.9,50.1,67.3,70.6,26.5,35.8,58.3,52.5,28.8,78.6,29.9,18.6,29.8,11.6,13.4,16.2,27.3,75.1,39.7,37.0,48.1,44.2,74.7,78.3,68.1,63.1,67.0,88.8,32.9,73.9,72.3,91.8,28.6,55.9,73.3,64.1
|
| 50 |
+
49,"<a href=""https://huggingface.co/nvidia/MM-Embed"">MM-Embed</a>",8.18,50.0,48.1,32.3,63.8,57.8,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 51 |
+
50,"<a href=""https://doi.org/10.48550/arXiv.2212.07143"">OpenCLIP-FT</a>",0.428,47.2,56.0,21.9,55.4,64.1,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 52 |
+
51,"<a href=""https://doi.org/10.48550/arXiv.2103.00020"">CLIP-FT</a>",0.428,45.4,55.2,19.7,53.2,62.2,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 53 |
+
52,"<a href=""https://huggingface.co/TIGER-Lab/UniIR"">UniIR (CLIP_SF)</a>",0.428,44.7,44.3,16.2,61.8,65.3,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 54 |
+
53,"<a href=""https://huggingface.co/JUNJIE99/MMRet-large"">MMRet-MLLM (LLaVA-1.6)</a>",7.57,44.0,47.2,18.4,56.5,62.2,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 55 |
+
54,"<a href=""https://huggingface.co/TIGER-Lab/UniIR"">UniIR (BLIP_FF)</a>",0.247,42.8,42.1,15.0,60.1,62.2,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 56 |
+
55,"<a href=""https://github.com/mlfoundations/open_clip"">open_clip-ViT-L/14</a>",0.428,39.7,47.8,10.9,52.3,53.3,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 57 |
+
56,"<a href=""https://huggingface.co/openai/clip-vit-large-patch14"">clip-vit-large-patch14</a>",0.428,37.8,42.8,9.1,53.0,51.8,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 58 |
+
57,"<a href=""https://huggingface.co/vidore/colpali-v1.3"">colpali-v1.3</a>",2.92,34.89,40.3,11.51,48.05,40.3,69.8,25.5,56.1,45.6,6.0,27.5,42.4,50.6,14.9,64.6,9.4,6.6,11.3,5.0,5.7,6.1,16.3,8.3,18.8,27.6,41.2,8.2,50.1,47.6,59.2,49.9,65.5,53.8,5.9,80.5,50.0,64.7,36.7,64.5,3.9,56.1
|
| 59 |
+
58,"<a href=""https://huggingface.co/google/siglip-base-patch16-224"">siglip-base-patch16-224</a>",0.203,34.8,40.3,8.4,31.6,59.5,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 60 |
+
59,"<a href=""https://github.com/google-deepmind/magiclens"">Magiclens</a>",0.428,27.8,38.8,8.3,35.4,26.0,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 61 |
+
60,"<a href=""https://huggingface.co/Salesforce/blip2-opt-2.7b"">blip2-opt-2.7b</a>",3.74,25.2,27.0,4.2,33.9,47.0,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
| 62 |
+
61,"<a href=""https://huggingface.co/royokong/e5-v"">e5-v</a>",8.36,13.3,21.8,4.9,11.5,19.0,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
|
rankings/image_ranking.jsonl
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"Rank":1,"Models":"<a href=\"https:\/\/interestfm-tte.github.io\/\">IFM-TTE-7B<\/a>","Model Size(B)":8.29,"Image-Overall":77.9,"I-CLS":76.74,"I-QA":78.49,"I-RET":74.58,"I-VG":89.3,"VOC2007":83.4,"N24News":82.1,"SUN397":78.8,"ObjectNet":74.0,"Country211":60.5,"Place365":65.5,"ImageNet-1K":84.0,"HatefulMemes":75.1,"ImageNet-A":73.6,"ImageNet-R":90.4,"OK-VQA":83.4,"A-OKVQA":77.3,"DocVQA":95.5,"InfographicsVQA":82.3,"ChartQA":83.2,"Visual7W":73.8,"ScienceQA":67.3,"GQA":79.1,"TextVQA":87.9,"VizWiz":55.1,"VisDial":84.6,"CIRR":66.4,"VisualNews_t2i":79.2,"VisualNews_i2t":82.5,"MSCOCO_t2i":77.7,"MSCOCO_i2t":72.6,"NIGHTS":69.4,"WebQA":91.1,"FashionIQ":32.4,"Wiki-SS-NQ":73.6,"OVEN":69.9,"EDIS":95.5,"MSCOCO":85.4,"RefCOCO":91.7,"RefCOCO-Matching":91.9,"Visual7W-Pointing":88.2}
|
| 2 |
+
{"Rank":2,"Models":"<a href=\"https:\/\/seed1-6-embedding.github.io\">seed-1.6-embedding<\/a>","Model Size(B)":"unknown","Image-Overall":77.78,"I-CLS":76.06,"I-QA":73.97,"I-RET":77.9,"I-VG":91.25,"VOC2007":91.9,"N24News":82.9,"SUN397":80.8,"ObjectNet":85.4,"Country211":47.7,"Place365":52.8,"ImageNet-1K":84.2,"HatefulMemes":73.7,"ImageNet-A":73.1,"ImageNet-R":88.1,"OK-VQA":74.2,"A-OKVQA":64.6,"DocVQA":96.2,"InfographicsVQA":82.9,"ChartQA":69.2,"Visual7W":59.6,"ScienceQA":79.2,"GQA":74.7,"TextVQA":85.1,"VizWiz":54.0,"VisDial":84.6,"CIRR":65.0,"VisualNews_t2i":83.4,"VisualNews_i2t":84.5,"MSCOCO_t2i":78.8,"MSCOCO_i2t":77.4,"NIGHTS":72.1,"WebQA":91.7,"FashionIQ":49.0,"Wiki-SS-NQ":79.6,"OVEN":77.2,"EDIS":91.5,"MSCOCO":82.1,"RefCOCO":95.5,"RefCOCO-Matching":93.0,"Visual7W-Pointing":94.4}
|
| 3 |
+
{"Rank":3,"Models":"<a href=\"https:\/\/github.com\/360CVGroup\/RzenEmbed\">RzenEmbed-v2-7B<\/a>","Model Size(B)":8.29,"Image-Overall":75.92,"I-CLS":70.61,"I-QA":71.67,"I-RET":78.5,"I-VG":92.1,"VOC2007":91.4,"N24News":84.0,"SUN397":82.7,"ObjectNet":72.0,"Country211":29.7,"Place365":48.1,"ImageNet-1K":84.1,"HatefulMemes":62.4,"ImageNet-A":61.5,"ImageNet-R":90.2,"OK-VQA":73.1,"A-OKVQA":63.1,"DocVQA":94.6,"InfographicsVQA":76.8,"ChartQA":69.2,"Visual7W":63.2,"ScienceQA":60.7,"GQA":74.6,"TextVQA":86.3,"VizWiz":55.1,"VisDial":85.6,"CIRR":67.2,"VisualNews_t2i":82.6,"VisualNews_i2t":85.8,"MSCOCO_t2i":81.1,"MSCOCO_i2t":78.0,"NIGHTS":69.0,"WebQA":92.4,"FashionIQ":41.5,"Wiki-SS-NQ":79.8,"OVEN":81.8,"EDIS":97.2,"MSCOCO":83.5,"RefCOCO":94.9,"RefCOCO-Matching":93.2,"Visual7W-Pointing":96.8}
|
| 4 |
+
{"Rank":4,"Models":"<a href=\"https:\/\/github.com\/QQ-MM\/QQMM-embed\">QQMM-embed-v2<\/a>","Model Size(B)":8.29,"Image-Overall":75.28,"I-CLS":72.97,"I-QA":71.85,"I-RET":76.01,"I-VG":87.42,"VOC2007":92.8,"N24News":82.4,"SUN397":83.7,"ObjectNet":74.2,"Country211":30.8,"Place365":48.8,"ImageNet-1K":83.5,"HatefulMemes":79.6,"ImageNet-A":62.7,"ImageNet-R":91.2,"OK-VQA":73.8,"A-OKVQA":71.1,"DocVQA":96.0,"InfographicsVQA":74.0,"ChartQA":68.1,"Visual7W":65.3,"ScienceQA":62.7,"GQA":62.4,"TextVQA":88.2,"VizWiz":56.9,"VisDial":85.5,"CIRR":69.5,"VisualNews_t2i":80.5,"VisualNews_i2t":83.9,"MSCOCO_t2i":82.6,"MSCOCO_i2t":80.1,"NIGHTS":67.0,"WebQA":92.7,"FashionIQ":32.2,"Wiki-SS-NQ":74.2,"OVEN":72.5,"EDIS":91.4,"MSCOCO":81.5,"RefCOCO":93.5,"RefCOCO-Matching":92.9,"Visual7W-Pointing":81.8}
|
| 5 |
+
{"Rank":5,"Models":"OEmbedding-v1-7B","Model Size(B)":8.29,"Image-Overall":74.05,"I-CLS":70.56,"I-QA":70.02,"I-RET":74.67,"I-VG":90.97,"VOC2007":89.2,"N24News":82.0,"SUN397":79.0,"ObjectNet":72.6,"Country211":31.2,"Place365":46.0,"ImageNet-1K":84.5,"HatefulMemes":70.5,"ImageNet-A":59.9,"ImageNet-R":90.7,"OK-VQA":73.2,"A-OKVQA":63.3,"DocVQA":95.2,"InfographicsVQA":74.4,"ChartQA":69.4,"Visual7W":60.5,"ScienceQA":54.8,"GQA":68.9,"TextVQA":86.2,"VizWiz":54.3,"VisDial":85.8,"CIRR":65.9,"VisualNews_t2i":82.5,"VisualNews_i2t":86.0,"MSCOCO_t2i":81.7,"MSCOCO_i2t":76.8,"NIGHTS":68.2,"WebQA":90.8,"FashionIQ":25.6,"Wiki-SS-NQ":69.9,"OVEN":73.5,"EDIS":89.4,"MSCOCO":81.8,"RefCOCO":95.6,"RefCOCO-Matching":94.1,"Visual7W-Pointing":92.4}
|
| 6 |
+
{"Rank":6,"Models":"ReCo-7B","Model Size(B)":8.29,"Image-Overall":73.87,"I-CLS":70.95,"I-QA":71.52,"I-RET":73.66,"I-VG":87.7,"VOC2007":88.8,"N24News":83.8,"SUN397":81.2,"ObjectNet":74.1,"Country211":28.0,"Place365":47.4,"ImageNet-1K":84.2,"HatefulMemes":73.6,"ImageNet-A":58.3,"ImageNet-R":90.1,"OK-VQA":74.1,"A-OKVQA":61.8,"DocVQA":95.1,"InfographicsVQA":76.3,"ChartQA":66.7,"Visual7W":67.2,"ScienceQA":54.5,"GQA":76.8,"TextVQA":87.3,"VizWiz":55.4,"VisDial":85.3,"CIRR":60.7,"VisualNews_t2i":81.4,"VisualNews_i2t":84.3,"MSCOCO_t2i":79.5,"MSCOCO_i2t":74.0,"NIGHTS":68.7,"WebQA":90.7,"FashionIQ":20.6,"Wiki-SS-NQ":72.1,"OVEN":74.0,"EDIS":92.6,"MSCOCO":74.1,"RefCOCO":93.5,"RefCOCO-Matching":94.1,"Visual7W-Pointing":89.1}
|
| 7 |
+
{"Rank":7,"Models":"RzenEmbed-v1-7B","Model Size(B)":8.29,"Image-Overall":73.6,"I-CLS":69.78,"I-QA":68.72,"I-RET":76.83,"I-VG":85.67,"VOC2007":91.6,"N24News":81.7,"SUN397":82.4,"ObjectNet":68.5,"Country211":29.0,"Place365":43.5,"ImageNet-1K":82.8,"HatefulMemes":70.0,"ImageNet-A":59.0,"ImageNet-R":89.3,"OK-VQA":69.6,"A-OKVQA":61.3,"DocVQA":94.8,"InfographicsVQA":73.6,"ChartQA":64.0,"Visual7W":60.5,"ScienceQA":58.2,"GQA":65.5,"TextVQA":86.2,"VizWiz":53.5,"VisDial":83.1,"CIRR":66.3,"VisualNews_t2i":82.6,"VisualNews_i2t":85.8,"MSCOCO_t2i":78.6,"MSCOCO_i2t":76.0,"NIGHTS":68.6,"WebQA":91.3,"FashionIQ":38.7,"Wiki-SS-NQ":74.8,"OVEN":79.6,"EDIS":96.5,"MSCOCO":78.3,"RefCOCO":88.3,"RefCOCO-Matching":85.1,"Visual7W-Pointing":91.0}
|
| 8 |
+
{"Rank":8,"Models":"<a href=\"https:\/\/huggingface.co\/OpenSearch-AI\/Ops-MM-embedding-v1-7B\">Ops-MM-embedding-v1-7B<\/a>","Model Size(B)":8.29,"Image-Overall":72.72,"I-CLS":69.65,"I-QA":69.58,"I-RET":73.09,"I-VG":87.15,"VOC2007":84.8,"N24News":82.1,"SUN397":81.0,"ObjectNet":69.7,"Country211":28.5,"Place365":45.6,"ImageNet-1K":81.1,"HatefulMemes":75.7,"ImageNet-A":57.8,"ImageNet-R":90.2,"OK-VQA":70.6,"A-OKVQA":60.0,"DocVQA":94.7,"InfographicsVQA":73.6,"ChartQA":65.2,"Visual7W":58.4,"ScienceQA":49.9,"GQA":79.9,"TextVQA":86.9,"VizWiz":56.6,"VisDial":81.8,"CIRR":55.2,"VisualNews_t2i":80.1,"VisualNews_i2t":84.3,"MSCOCO_t2i":79.3,"MSCOCO_i2t":72.1,"NIGHTS":66.2,"WebQA":91.9,"FashionIQ":24.3,"Wiki-SS-NQ":74.3,"OVEN":73.2,"EDIS":94.4,"MSCOCO":73.9,"RefCOCO":90.4,"RefCOCO-Matching":92.7,"Visual7W-Pointing":91.6}
|
| 9 |
+
{"Rank":9,"Models":"TCE-v1","Model Size(B)":8.0,"Image-Overall":72.36,"I-CLS":67.89,"I-QA":70.28,"I-RET":72.31,"I-VG":88.85,"VOC2007":91.6,"N24News":81.7,"SUN397":78.8,"ObjectNet":53.8,"Country211":20.2,"Place365":46.4,"ImageNet-1K":81.3,"HatefulMemes":75.4,"ImageNet-A":59.6,"ImageNet-R":90.1,"OK-VQA":71.5,"A-OKVQA":61.4,"DocVQA":95.0,"InfographicsVQA":81.0,"ChartQA":72.0,"Visual7W":57.8,"ScienceQA":58.7,"GQA":69.4,"TextVQA":83.6,"VizWiz":52.4,"VisDial":86.7,"CIRR":57.8,"VisualNews_t2i":77.9,"VisualNews_i2t":81.8,"MSCOCO_t2i":79.0,"MSCOCO_i2t":77.3,"NIGHTS":68.1,"WebQA":89.3,"FashionIQ":25.6,"Wiki-SS-NQ":66.2,"OVEN":65.4,"EDIS":92.6,"MSCOCO":85.2,"RefCOCO":93.7,"RefCOCO-Matching":91.4,"Visual7W-Pointing":85.1}
|
| 10 |
+
{"Rank":10,"Models":"<a href=\"https:\/\/github.com\/QQ-MM\/QQMM-embed\">QQMM-embed<\/a>","Model Size(B)":8.297,"Image-Overall":72.175,"I-CLS":70.07,"I-QA":69.52,"I-RET":71.175,"I-VG":87.075,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 11 |
+
{"Rank":11,"Models":"<a href=\"https:\/\/huggingface.co\/raghavlite\/B3_Qwen2_7B\">B3_Qwen2_7B<\/a>","Model Size(B)":8.29,"Image-Overall":72.0,"I-CLS":70.0,"I-QA":66.5,"I-RET":74.1,"I-VG":84.6,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 12 |
+
{"Rank":12,"Models":"<a href=\"https:\/\/github.com\/GaryGuTC\/UniME-v2\">UniME-V2-LLaVA-OneVision-7B<\/a>","Model Size(B)":8.03,"Image-Overall":71.77,"I-CLS":65.64,"I-QA":68.66,"I-RET":73.1,"I-VG":90.85,"VOC2007":92.5,"N24News":66.0,"SUN397":78.8,"ObjectNet":73.2,"Country211":19.0,"Place365":43.8,"ImageNet-1K":78.9,"HatefulMemes":66.1,"ImageNet-A":49.0,"ImageNet-R":89.1,"OK-VQA":71.7,"A-OKVQA":71.5,"DocVQA":92.4,"InfographicsVQA":67.1,"ChartQA":59.2,"Visual7W":62.7,"ScienceQA":55.2,"GQA":69.0,"TextVQA":84.4,"VizWiz":53.4,"VisDial":84.8,"CIRR":67.0,"VisualNews_t2i":77.3,"VisualNews_i2t":80.1,"MSCOCO_t2i":80.0,"MSCOCO_i2t":74.6,"NIGHTS":68.3,"WebQA":90.2,"FashionIQ":27.0,"Wiki-SS-NQ":70.9,"OVEN":68.5,"EDIS":88.5,"MSCOCO":81.3,"RefCOCO":95.3,"RefCOCO-Matching":92.8,"Visual7W-Pointing":94.0}
|
| 13 |
+
{"Rank":13,"Models":"<a href=\"https:\/\/huggingface.co\/zhibinlan\/UME-R1-7B\">UME-R1-7B<\/a>","Model Size(B)":8.29,"Image-Overall":71.25,"I-CLS":67.09,"I-QA":69.18,"I-RET":71.9,"I-VG":84.85,"VOC2007":90.8,"N24News":82.3,"SUN397":80.3,"ObjectNet":42.3,"Country211":25.0,"Place365":46.8,"ImageNet-1K":80.4,"HatefulMemes":79.0,"ImageNet-A":53.9,"ImageNet-R":90.1,"OK-VQA":71.7,"A-OKVQA":58.7,"DocVQA":93.8,"InfographicsVQA":79.2,"ChartQA":75.1,"Visual7W":55.2,"ScienceQA":53.7,"GQA":69.3,"TextVQA":83.5,"VizWiz":51.6,"VisDial":80.7,"CIRR":55.3,"VisualNews_t2i":76.8,"VisualNews_i2t":82.0,"MSCOCO_t2i":78.3,"MSCOCO_i2t":71.4,"NIGHTS":68.1,"WebQA":90.9,"FashionIQ":23.4,"Wiki-SS-NQ":72.5,"OVEN":71.4,"EDIS":92.0,"MSCOCO":72.7,"RefCOCO":91.4,"RefCOCO-Matching":91.1,"Visual7W-Pointing":84.2}
|
| 14 |
+
{"Rank":14,"Models":"<a href=\"https:\/\/huggingface.co\/DeepGlint-AI\/UniME-LLaVA-OneVision-7B\">UniME(LLaVA-OneVision-7B-LoRA-Res336)<\/a>","Model Size(B)":8.03,"Image-Overall":70.7,"I-CLS":66.8,"I-QA":66.6,"I-RET":70.5,"I-VG":90.9,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 15 |
+
{"Rank":15,"Models":"<a href=\"https:\/\/huggingface.co\/zhibinlan\/LLaVE-7B\">LLaVE-7B<\/a>","Model Size(B)":8.03,"Image-Overall":70.3,"I-CLS":65.7,"I-QA":65.4,"I-RET":70.9,"I-VG":91.9,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 16 |
+
{"Rank":16,"Models":"<a href=\"https:\/\/huggingface.co\/friedrichor\/Unite-Instruct-Qwen2-VL-7B\">UNITE-Instruct-7B<\/a>","Model Size(B)":8.29,"Image-Overall":70.3,"I-CLS":68.3,"I-QA":65.1,"I-RET":71.6,"I-VG":84.8,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 17 |
+
{"Rank":17,"Models":"<a href=\"https:\/\/huggingface.co\/intfloat\/mmE5-mllama-11b-instruct\">mmE5-mllama-11b-instruct<\/a>","Model Size(B)":10.6,"Image-Overall":69.8,"I-CLS":67.6,"I-QA":62.6,"I-RET":71.0,"I-VG":89.6,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 18 |
+
{"Rank":18,"Models":"<a href=\"https:\/\/arxiv.org\/pdf\/2503.19900\">interestFM-UIR-CAFe-7B<\/a>","Model Size(B)":8.03,"Image-Overall":69.8,"I-CLS":65.2,"I-QA":65.6,"I-RET":70.0,"I-VG":91.2,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 19 |
+
{"Rank":19,"Models":"<a href=\"https:\/\/huggingface.co\/BAAI\/BGE-VL-v1.5-mmeb\">BGE-VL-v1.5 (FT; LlaVA-1.6-Mistral)<\/a>","Model Size(B)":7.57,"Image-Overall":69.4,"I-CLS":63.7,"I-QA":64.9,"I-RET":72.2,"I-VG":86.6,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 20 |
+
{"Rank":20,"Models":"<a href=\"https:\/\/huggingface.co\/OpenSearch-AI\/Ops-MM-embedding-v1-2B\">Ops-MM-embedding-v1-2B<\/a>","Model Size(B)":2.21,"Image-Overall":69.03,"I-CLS":68.07,"I-QA":65.11,"I-RET":69.17,"I-VG":80.85,"VOC2007":81.3,"N24News":78.4,"SUN397":80.7,"ObjectNet":68.4,"Country211":28.6,"Place365":43.9,"ImageNet-1K":81.1,"HatefulMemes":74.0,"ImageNet-A":53.1,"ImageNet-R":91.2,"OK-VQA":64.1,"A-OKVQA":54.0,"DocVQA":93.0,"InfographicsVQA":65.3,"ChartQA":56.1,"Visual7W":58.5,"ScienceQA":47.3,"GQA":74.4,"TextVQA":83.1,"VizWiz":55.3,"VisDial":79.4,"CIRR":47.3,"VisualNews_t2i":76.2,"VisualNews_i2t":79.1,"MSCOCO_t2i":74.1,"MSCOCO_i2t":69.7,"NIGHTS":66.1,"WebQA":91.1,"FashionIQ":18.5,"Wiki-SS-NQ":68.8,"OVEN":69.1,"EDIS":90.6,"MSCOCO":66.8,"RefCOCO":84.1,"RefCOCO-Matching":87.9,"Visual7W-Pointing":84.6}
|
| 21 |
+
{"Rank":21,"Models":"RzenEmbed-v1-2B","Model Size(B)":2.21,"Image-Overall":68.53,"I-CLS":65.29,"I-QA":61.7,"I-RET":73.81,"I-VG":77.85,"VOC2007":89.6,"N24News":74.0,"SUN397":78.4,"ObjectNet":71.3,"Country211":24.5,"Place365":41.3,"ImageNet-1K":80.9,"HatefulMemes":58.6,"ImageNet-A":49.5,"ImageNet-R":84.8,"OK-VQA":60.0,"A-OKVQA":53.2,"DocVQA":90.5,"InfographicsVQA":59.3,"ChartQA":51.8,"Visual7W":57.1,"ScienceQA":47.5,"GQA":60.4,"TextVQA":83.8,"VizWiz":53.4,"VisDial":79.1,"CIRR":62.7,"VisualNews_t2i":78.0,"VisualNews_i2t":81.4,"MSCOCO_t2i":76.7,"MSCOCO_i2t":73.6,"NIGHTS":67.9,"WebQA":91.0,"FashionIQ":33.8,"Wiki-SS-NQ":74.9,"OVEN":74.6,"EDIS":92.0,"MSCOCO":68.5,"RefCOCO":86.0,"RefCOCO-Matching":74.8,"Visual7W-Pointing":82.1}
|
| 22 |
+
{"Rank":22,"Models":"<a href=\"https:\/\/huggingface.co\/raghavlite\/B3_Qwen2_2B\">B3_Qwen2_2B<\/a>","Model Size(B)":2.21,"Image-Overall":68.1,"I-CLS":67.0,"I-QA":61.19,"I-RET":70.85,"I-VG":79.88,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 23 |
+
{"Rank":23,"Models":"<a href=\"https:\/\/arxiv.org\/abs\/2503.19900\">interestFM-UIR-CAFe-7B<\/a>","Model Size(B)":8.03,"Image-Overall":67.56,"I-CLS":63.63,"I-QA":61.66,"I-RET":69.07,"I-VG":87.58,"VOC2007":89.8,"N24News":83.2,"SUN397":79.9,"ObjectNet":22.5,"Country211":16.7,"Place365":45.0,"ImageNet-1K":77.3,"HatefulMemes":78.7,"ImageNet-A":55.2,"ImageNet-R":88.0,"OK-VQA":67.3,"A-OKVQA":63.8,"DocVQA":79.2,"InfographicsVQA":53.3,"ChartQA":48.8,"Visual7W":52.5,"ScienceQA":65.4,"GQA":65.7,"TextVQA":76.8,"VizWiz":43.8,"VisDial":82.7,"CIRR":60.4,"VisualNews_t2i":69.5,"VisualNews_i2t":79.4,"MSCOCO_t2i":75.4,"MSCOCO_i2t":73.1,"NIGHTS":66.7,"WebQA":89.3,"FashionIQ":39.0,"Wiki-SS-NQ":61.2,"OVEN":60.8,"EDIS":71.3,"MSCOCO":84.7,"RefCOCO":89.4,"RefCOCO-Matching":83.0,"Visual7W-Pointing":93.2}
|
| 24 |
+
{"Rank":24,"Models":"<a href=\"https:\/\/huggingface.co\/DeepGlint-AI\/UniME-LLaVA-1.6-7B\">UniME(LLaVA-1.6-7B-LoRA-LowRes)<\/a>","Model Size(B)":7.57,"Image-Overall":66.6,"I-CLS":60.6,"I-QA":52.9,"I-RET":67.9,"I-VG":85.1,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 25 |
+
{"Rank":25,"Models":"<a href=\"https:\/\/huggingface.co\/zhibinlan\/UME-R1-2B\">UME-R1-2B<\/a>","Model Size(B)":2.21,"Image-Overall":66.56,"I-CLS":64.81,"I-QA":62.78,"I-RET":67.62,"I-VG":77.17,"VOC2007":80.0,"N24News":81.1,"SUN397":79.4,"ObjectNet":52.0,"Country211":23.4,"Place365":42.6,"ImageNet-1K":75.3,"HatefulMemes":75.2,"ImageNet-A":50.4,"ImageNet-R":88.7,"OK-VQA":62.4,"A-OKVQA":51.1,"DocVQA":92.2,"InfographicsVQA":67.7,"ChartQA":64.9,"Visual7W":54.1,"ScienceQA":42.7,"GQA":67.3,"TextVQA":78.6,"VizWiz":46.8,"VisDial":76.6,"CIRR":53.7,"VisualNews_t2i":71.7,"VisualNews_i2t":74.2,"MSCOCO_t2i":75.1,"MSCOCO_i2t":68.9,"NIGHTS":67.2,"WebQA":90.0,"FashionIQ":17.1,"Wiki-SS-NQ":62.0,"OVEN":66.9,"EDIS":88.0,"MSCOCO":69.5,"RefCOCO":83.3,"RefCOCO-Matching":84.4,"Visual7W-Pointing":71.5}
|
| 26 |
+
{"Rank":26,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Qwen2VL-7B\">VLM2Vec (Qwen2-VL-7B-LoRA-HighRes)<\/a>","Model Size(B)":8.29,"Image-Overall":65.8,"I-CLS":62.6,"I-QA":57.8,"I-RET":69.9,"I-VG":81.7,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 27 |
+
{"Rank":27,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Qwen2VL-7B\">VLM2Vec-V1-Qwen2VL-7B<\/a>","Model Size(B)":8.29,"Image-Overall":65.49,"I-CLS":62.69,"I-QA":56.85,"I-RET":69.44,"I-VG":82.22,"VOC2007":80.7,"N24News":79.7,"SUN397":77.4,"ObjectNet":40.1,"Country211":29.8,"Place365":37.4,"ImageNet-1K":80.1,"HatefulMemes":69.7,"ImageNet-A":58.1,"ImageNet-R":73.9,"OK-VQA":56.8,"A-OKVQA":47.3,"DocVQA":89.7,"InfographicsVQA":60.0,"ChartQA":56.9,"Visual7W":52.7,"ScienceQA":38.5,"GQA":55.1,"TextVQA":71.6,"VizWiz":39.9,"VisDial":81.9,"CIRR":51.1,"VisualNews_t2i":80.5,"VisualNews_i2t":81.2,"MSCOCO_t2i":77.2,"MSCOCO_i2t":73.9,"NIGHTS":67.6,"WebQA":88.3,"FashionIQ":17.1,"Wiki-SS-NQ":62.3,"OVEN":66.5,"EDIS":85.7,"MSCOCO":75.7,"RefCOCO":87.6,"RefCOCO-Matching":84.6,"Visual7W-Pointing":81.0}
|
| 28 |
+
{"Rank":28,"Models":"<a href=\"https:\/\/huggingface.co\/zhibinlan\/LLaVE-2B\">LLaVE-2B<\/a>","Model Size(B)":1.95,"Image-Overall":65.2,"I-CLS":62.1,"I-QA":60.2,"I-RET":65.2,"I-VG":84.9,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 29 |
+
{"Rank":29,"Models":"<a href=\"https:\/\/huggingface.co\/VLM2Vec\/VLM2Vec-V2.0\">VLM2Vec-V2.0-Qwen2VL-2B<\/a>","Model Size(B)":2.21,"Image-Overall":64.85,"I-CLS":62.9,"I-QA":56.29,"I-RET":69.47,"I-VG":77.3,"VOC2007":85.0,"N24News":72.9,"SUN397":71.0,"ObjectNet":65.2,"Country211":25.2,"Place365":35.9,"ImageNet-1K":80.8,"HatefulMemes":56.3,"ImageNet-A":47.4,"ImageNet-R":89.3,"OK-VQA":51.5,"A-OKVQA":43.6,"DocVQA":90.1,"InfographicsVQA":58.8,"ChartQA":47.4,"Visual7W":52.9,"ScienceQA":38.2,"GQA":64.9,"TextVQA":72.2,"VizWiz":43.3,"VisDial":82.7,"CIRR":57.5,"VisualNews_t2i":74.5,"VisualNews_i2t":78.2,"MSCOCO_t2i":75.3,"MSCOCO_i2t":71.4,"NIGHTS":68.6,"WebQA":90.6,"FashionIQ":19.5,"Wiki-SS-NQ":66.9,"OVEN":64.3,"EDIS":84.1,"MSCOCO":67.1,"RefCOCO":87.1,"RefCOCO-Matching":85.8,"Visual7W-Pointing":69.2}
|
| 30 |
+
{"Rank":30,"Models":"<a href=\"https:\/\/huggingface.co\/DeepGlint-AI\/UniME-Phi3.5-V-4.2B\">UniME(Phi-3.5-V-LoRA)<\/a>","Model Size(B)":4.2,"Image-Overall":64.2,"I-CLS":54.8,"I-QA":55.9,"I-RET":64.5,"I-VG":81.8,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 31 |
+
{"Rank":31,"Models":"<a href=\"https:\/\/huggingface.co\/JUNJIE99\/MMRet-large\">MMRet-MLLM (FT)<\/a>","Model Size(B)":7.57,"Image-Overall":64.1,"I-CLS":56.0,"I-QA":57.4,"I-RET":69.9,"I-VG":83.6,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 32 |
+
{"Rank":32,"Models":"<a href=\"https:\/\/huggingface.co\/friedrichor\/Unite-Instruct-Qwen2-VL-2B\">UNITE-Instruct-2B<\/a>","Model Size(B)":2.21,"Image-Overall":63.3,"I-CLS":63.2,"I-QA":55.9,"I-RET":65.4,"I-VG":75.6,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 33 |
+
{"Rank":33,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-LLaVa-Next\">VLM2Vec (LLaVA-1.6-LoRA-HighRes)<\/a>","Model Size(B)":7.57,"Image-Overall":62.9,"I-CLS":61.2,"I-QA":49.9,"I-RET":67.4,"I-VG":86.1,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 34 |
+
{"Rank":34,"Models":"<a href=\"https:\/\/huggingface.co\/BAAI\/BGE-VL-v1.5-zs\">BGE-VL-v1.5 (zeroshot; LlaVA-1.6-Mistral)<\/a>","Model Size(B)":7.57,"Image-Overall":60.1,"I-CLS":56.1,"I-QA":55.3,"I-RET":63.9,"I-VG":70.8,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 35 |
+
{"Rank":35,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Full\">VLM2Vec (Phi-3.5-V-LoRA)<\/a>","Model Size(B)":4.15,"Image-Overall":60.1,"I-CLS":54.8,"I-QA":54.9,"I-RET":62.3,"I-VG":79.5,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 36 |
+
{"Rank":36,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Qwen2VL-2B\">VLM2Vec-V1-Qwen2VL-2B<\/a>","Model Size(B)":2.21,"Image-Overall":59.74,"I-CLS":58.71,"I-QA":49.26,"I-RET":64.98,"I-VG":72.85,"VOC2007":74.3,"N24News":73.7,"SUN397":73.8,"ObjectNet":37.1,"Country211":21.5,"Place365":35.3,"ImageNet-1K":77.5,"HatefulMemes":58.3,"ImageNet-A":50.9,"ImageNet-R":84.7,"OK-VQA":48.5,"A-OKVQA":39.5,"DocVQA":82.5,"InfographicsVQA":47.7,"ChartQA":42.3,"Visual7W":51.2,"ScienceQA":30.7,"GQA":48.3,"TextVQA":63.3,"VizWiz":38.6,"VisDial":74.3,"CIRR":46.8,"VisualNews_t2i":73.1,"VisualNews_i2t":73.7,"MSCOCO_t2i":73.4,"MSCOCO_i2t":68.5,"NIGHTS":66.3,"WebQA":85.9,"FashionIQ":14.0,"Wiki-SS-NQ":54.2,"OVEN":68.3,"EDIS":81.2,"MSCOCO":66.5,"RefCOCO":80.9,"RefCOCO-Matching":75.7,"Visual7W-Pointing":68.3}
|
| 37 |
+
{"Rank":37,"Models":"<a href=\"https:\/\/arxiv.org\/pdf\/2503.19900\">interestFM-UIR-CAFe-0.5B<\/a>","Model Size(B)":0.894,"Image-Overall":59.6,"I-CLS":59.1,"I-QA":49.1,"I-RET":61.0,"I-VG":83.0,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 38 |
+
{"Rank":38,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Qwen2VL-2B\">VLM2Vec (Qwen2-VL-2B-LoRA-HighRes)<\/a>","Model Size(B)":2.21,"Image-Overall":59.3,"I-CLS":59.0,"I-QA":49.4,"I-RET":65.4,"I-VG":73.4,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 39 |
+
{"Rank":39,"Models":"<a href=\"https:\/\/huggingface.co\/zhibinlan\/LLaVE-0.5B\">LLaVE-0.5B<\/a>","Model Size(B)":0.894,"Image-Overall":59.1,"I-CLS":57.4,"I-QA":50.3,"I-RET":59.8,"I-VG":82.9,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 40 |
+
{"Rank":40,"Models":"<a href=\"https:\/\/huggingface.co\/intfloat\/mmE5-mllama-11b-instruct\">mmE5 (w\/ 560K synthetic data)<\/a>","Model Size(B)":10.6,"Image-Overall":58.6,"I-CLS":60.6,"I-QA":55.7,"I-RET":54.7,"I-VG":72.4,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 41 |
+
{"Rank":41,"Models":"<a href=\"https:\/\/huggingface.co\/Alibaba-NLP\/gme-Qwen2-VL-7B-Instruct\">gme-Qwen2-VL-7B-Instruct<\/a>","Model Size(B)":8.29,"Image-Overall":55.95,"I-CLS":57.65,"I-QA":34.66,"I-RET":71.17,"I-VG":59.3,"VOC2007":80.3,"N24News":50.5,"SUN397":69.5,"ObjectNet":69.0,"Country211":24.8,"Place365":39.1,"ImageNet-1K":64.6,"HatefulMemes":53.6,"ImageNet-A":41.2,"ImageNet-R":83.9,"OK-VQA":33.2,"A-OKVQA":21.0,"DocVQA":41.4,"InfographicsVQA":20.3,"ChartQA":17.8,"Visual7W":22.2,"ScienceQA":28.0,"GQA":76.9,"TextVQA":46.8,"VizWiz":39.0,"VisDial":60.8,"CIRR":54.9,"VisualNews_t2i":79.7,"VisualNews_i2t":83.6,"MSCOCO_t2i":71.2,"MSCOCO_i2t":57.7,"NIGHTS":67.6,"WebQA":91.4,"FashionIQ":37.8,"Wiki-SS-NQ":78.2,"OVEN":75.1,"EDIS":96.0,"MSCOCO":31.4,"RefCOCO":60.9,"RefCOCO-Matching":78.4,"Visual7W-Pointing":66.5}
|
| 42 |
+
{"Rank":42,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Full\">VLM2Vec (Phi-3.5-V-FT)<\/a>","Model Size(B)":4.15,"Image-Overall":55.9,"I-CLS":52.8,"I-QA":50.3,"I-RET":57.8,"I-VG":72.3,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 43 |
+
{"Rank":43,"Models":"<a href=\"https:\/\/huggingface.co\/Alibaba-NLP\/gme-Qwen2-VL-2B-Instruct\">gme-Qwen2-VL-2B-Instruct<\/a>","Model Size(B)":2.21,"Image-Overall":55.8,"I-CLS":56.9,"I-QA":41.2,"I-RET":67.8,"I-VG":53.4,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 44 |
+
{"Rank":44,"Models":"<a href=\"https:\/\/arxiv.org\/abs\/2503.19900\">interestFM-UIR-CAFe-0.5B<\/a>","Model Size(B)":0.894,"Image-Overall":55.43,"I-CLS":56.44,"I-QA":45.28,"I-RET":57.55,"I-VG":71.95,"VOC2007":81.9,"N24News":69.6,"SUN397":68.8,"ObjectNet":51.0,"Country211":11.3,"Place365":37.4,"ImageNet-1K":64.6,"HatefulMemes":55.3,"ImageNet-A":38.1,"ImageNet-R":86.4,"OK-VQA":47.1,"A-OKVQA":43.0,"DocVQA":60.6,"InfographicsVQA":30.2,"ChartQA":32.7,"Visual7W":48.9,"ScienceQA":39.7,"GQA":52.1,"TextVQA":59.6,"VizWiz":38.9,"VisDial":68.4,"CIRR":43.4,"VisualNews_t2i":58.4,"VisualNews_i2t":61.0,"MSCOCO_t2i":66.4,"MSCOCO_i2t":63.2,"NIGHTS":63.8,"WebQA":80.0,"FashionIQ":23.3,"Wiki-SS-NQ":46.4,"OVEN":52.4,"EDIS":63.9,"MSCOCO":63.1,"RefCOCO":76.6,"RefCOCO-Matching":70.6,"Visual7W-Pointing":77.5}
|
| 45 |
+
{"Rank":45,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-LLaVa-Next\">VLM2Vec (LLaVA-1.6-LoRA-LowRes)<\/a>","Model Size(B)":7.57,"Image-Overall":55.0,"I-CLS":54.7,"I-QA":50.3,"I-RET":56.2,"I-VG":64.0,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 46 |
+
{"Rank":46,"Models":"<a href=\"https:\/\/huggingface.co\/code-kunkun\/LamRA-Ret\">LamRA-Ret<\/a>","Model Size(B)":8.29,"Image-Overall":54.08,"I-CLS":59.2,"I-QA":26.47,"I-RET":69.95,"I-VG":62.65,"VOC2007":80.1,"N24News":51.3,"SUN397":68.5,"ObjectNet":66.4,"Country211":28.3,"Place365":40.6,"ImageNet-1K":72.3,"HatefulMemes":49.0,"ImageNet-A":47.0,"ImageNet-R":88.5,"OK-VQA":37.8,"A-OKVQA":27.0,"DocVQA":22.3,"InfographicsVQA":16.5,"ChartQA":11.7,"Visual7W":19.6,"ScienceQA":26.3,"GQA":38.5,"TextVQA":33.0,"VizWiz":32.0,"VisDial":61.3,"CIRR":51.7,"VisualNews_t2i":70.4,"VisualNews_i2t":83.9,"MSCOCO_t2i":72.2,"MSCOCO_i2t":73.7,"NIGHTS":65.6,"WebQA":81.0,"FashionIQ":42.0,"Wiki-SS-NQ":69.7,"OVEN":82.0,"EDIS":85.9,"MSCOCO":44.8,"RefCOCO":62.8,"RefCOCO-Matching":75.7,"Visual7W-Pointing":67.3}
|
| 47 |
+
{"Rank":47,"Models":"<a href=\"https:\/\/huggingface.co\/code-kunkun\/LamRA-Ret-Qwen2.5VL-7b\">LamRA-Ret-Qwen2.5VL-7b<\/a>","Model Size(B)":8.29,"Image-Overall":52.43,"I-CLS":51.7,"I-QA":34.12,"I-RET":66.86,"I-VG":56.73,"VOC2007":78.7,"N24News":29.8,"SUN397":66.5,"ObjectNet":59.4,"Country211":21.7,"Place365":37.4,"ImageNet-1K":58.9,"HatefulMemes":51.3,"ImageNet-A":36.3,"ImageNet-R":77.0,"OK-VQA":39.9,"A-OKVQA":34.1,"DocVQA":37.1,"InfographicsVQA":23.7,"ChartQA":15.0,"Visual7W":24.6,"ScienceQA":31.3,"GQA":57.4,"TextVQA":46.1,"VizWiz":32.0,"VisDial":62.5,"CIRR":44.7,"VisualNews_t2i":70.1,"VisualNews_i2t":74.2,"MSCOCO_t2i":65.7,"MSCOCO_i2t":71.1,"NIGHTS":64.4,"WebQA":85.7,"FashionIQ":33.4,"Wiki-SS-NQ":67.0,"OVEN":84.8,"EDIS":78.7,"MSCOCO":36.0,"RefCOCO":57.1,"RefCOCO-Matching":82.6,"Visual7W-Pointing":51.2}
|
| 48 |
+
{"Rank":48,"Models":"<a href=\"https:\/\/huggingface.co\/Alibaba-NLP\/gme-Qwen2-VL-2B-Instruct\">gme-Qwen2-VL-2B-Instruct<\/a>","Model Size(B)":2.21,"Image-Overall":51.89,"I-CLS":54.44,"I-QA":29.86,"I-RET":66.93,"I-VG":55.47,"VOC2007":75.9,"N24News":50.1,"SUN397":67.3,"ObjectNet":70.6,"Country211":26.5,"Place365":35.8,"ImageNet-1K":58.3,"HatefulMemes":52.5,"ImageNet-A":28.8,"ImageNet-R":78.6,"OK-VQA":29.9,"A-OKVQA":18.6,"DocVQA":29.8,"InfographicsVQA":11.6,"ChartQA":13.4,"Visual7W":16.2,"ScienceQA":27.3,"GQA":75.1,"TextVQA":39.7,"VizWiz":37.0,"VisDial":48.1,"CIRR":44.2,"VisualNews_t2i":74.7,"VisualNews_i2t":78.3,"MSCOCO_t2i":68.1,"MSCOCO_i2t":63.1,"NIGHTS":67.0,"WebQA":88.8,"FashionIQ":32.9,"Wiki-SS-NQ":73.9,"OVEN":72.3,"EDIS":91.8,"MSCOCO":28.6,"RefCOCO":55.9,"RefCOCO-Matching":73.3,"Visual7W-Pointing":64.1}
|
| 49 |
+
{"Rank":49,"Models":"<a href=\"https:\/\/huggingface.co\/nvidia\/MM-Embed\">MM-Embed<\/a>","Model Size(B)":8.18,"Image-Overall":50.0,"I-CLS":48.1,"I-QA":32.3,"I-RET":63.8,"I-VG":57.8,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 50 |
+
{"Rank":50,"Models":"<a href=\"https:\/\/doi.org\/10.48550\/arXiv.2212.07143\">OpenCLIP-FT<\/a>","Model Size(B)":0.428,"Image-Overall":47.2,"I-CLS":56.0,"I-QA":21.9,"I-RET":55.4,"I-VG":64.1,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 51 |
+
{"Rank":51,"Models":"<a href=\"https:\/\/doi.org\/10.48550\/arXiv.2103.00020\">CLIP-FT<\/a>","Model Size(B)":0.428,"Image-Overall":45.4,"I-CLS":55.2,"I-QA":19.7,"I-RET":53.2,"I-VG":62.2,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 52 |
+
{"Rank":52,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/UniIR\">UniIR (CLIP_SF)<\/a>","Model Size(B)":0.428,"Image-Overall":44.7,"I-CLS":44.3,"I-QA":16.2,"I-RET":61.8,"I-VG":65.3,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 53 |
+
{"Rank":53,"Models":"<a href=\"https:\/\/huggingface.co\/JUNJIE99\/MMRet-large\">MMRet-MLLM (LLaVA-1.6)<\/a>","Model Size(B)":7.57,"Image-Overall":44.0,"I-CLS":47.2,"I-QA":18.4,"I-RET":56.5,"I-VG":62.2,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 54 |
+
{"Rank":54,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/UniIR\">UniIR (BLIP_FF)<\/a>","Model Size(B)":0.247,"Image-Overall":42.8,"I-CLS":42.1,"I-QA":15.0,"I-RET":60.1,"I-VG":62.2,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 55 |
+
{"Rank":55,"Models":"<a href=\"https:\/\/github.com\/mlfoundations\/open_clip\">open_clip-ViT-L\/14<\/a>","Model Size(B)":0.428,"Image-Overall":39.7,"I-CLS":47.8,"I-QA":10.9,"I-RET":52.3,"I-VG":53.3,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 56 |
+
{"Rank":56,"Models":"<a href=\"https:\/\/huggingface.co\/openai\/clip-vit-large-patch14\">clip-vit-large-patch14<\/a>","Model Size(B)":0.428,"Image-Overall":37.8,"I-CLS":42.8,"I-QA":9.1,"I-RET":53.0,"I-VG":51.8,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 57 |
+
{"Rank":57,"Models":"<a href=\"https:\/\/huggingface.co\/vidore\/colpali-v1.3\">colpali-v1.3<\/a>","Model Size(B)":2.92,"Image-Overall":34.89,"I-CLS":40.3,"I-QA":11.51,"I-RET":48.05,"I-VG":40.3,"VOC2007":69.8,"N24News":25.5,"SUN397":56.1,"ObjectNet":45.6,"Country211":6.0,"Place365":27.5,"ImageNet-1K":42.4,"HatefulMemes":50.6,"ImageNet-A":14.9,"ImageNet-R":64.6,"OK-VQA":9.4,"A-OKVQA":6.6,"DocVQA":11.3,"InfographicsVQA":5.0,"ChartQA":5.7,"Visual7W":6.1,"ScienceQA":16.3,"GQA":8.3,"TextVQA":18.8,"VizWiz":27.6,"VisDial":41.2,"CIRR":8.2,"VisualNews_t2i":50.1,"VisualNews_i2t":47.6,"MSCOCO_t2i":59.2,"MSCOCO_i2t":49.9,"NIGHTS":65.5,"WebQA":53.8,"FashionIQ":5.9,"Wiki-SS-NQ":80.5,"OVEN":50.0,"EDIS":64.7,"MSCOCO":36.7,"RefCOCO":64.5,"RefCOCO-Matching":3.9,"Visual7W-Pointing":56.1}
|
| 58 |
+
{"Rank":58,"Models":"<a href=\"https:\/\/huggingface.co\/google\/siglip-base-patch16-224\">siglip-base-patch16-224<\/a>","Model Size(B)":0.203,"Image-Overall":34.8,"I-CLS":40.3,"I-QA":8.4,"I-RET":31.6,"I-VG":59.5,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 59 |
+
{"Rank":59,"Models":"<a href=\"https:\/\/github.com\/google-deepmind\/magiclens\">Magiclens<\/a>","Model Size(B)":0.428,"Image-Overall":27.8,"I-CLS":38.8,"I-QA":8.3,"I-RET":35.4,"I-VG":26.0,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 60 |
+
{"Rank":60,"Models":"<a href=\"https:\/\/huggingface.co\/Salesforce\/blip2-opt-2.7b\">blip2-opt-2.7b<\/a>","Model Size(B)":3.74,"Image-Overall":25.2,"I-CLS":27.0,"I-QA":4.2,"I-RET":33.9,"I-VG":47.0,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
| 61 |
+
{"Rank":61,"Models":"<a href=\"https:\/\/huggingface.co\/royokong\/e5-v\">e5-v<\/a>","Model Size(B)":8.36,"Image-Overall":13.3,"I-CLS":21.8,"I-QA":4.9,"I-RET":11.5,"I-VG":19.0,"VOC2007":"-","N24News":"-","SUN397":"-","ObjectNet":"-","Country211":"-","Place365":"-","ImageNet-1K":"-","HatefulMemes":"-","ImageNet-A":"-","ImageNet-R":"-","OK-VQA":"-","A-OKVQA":"-","DocVQA":"-","InfographicsVQA":"-","ChartQA":"-","Visual7W":"-","ScienceQA":"-","GQA":"-","TextVQA":"-","VizWiz":"-","VisDial":"-","CIRR":"-","VisualNews_t2i":"-","VisualNews_i2t":"-","MSCOCO_t2i":"-","MSCOCO_i2t":"-","NIGHTS":"-","WebQA":"-","FashionIQ":"-","Wiki-SS-NQ":"-","OVEN":"-","EDIS":"-","MSCOCO":"-","RefCOCO":"-","RefCOCO-Matching":"-","Visual7W-Pointing":"-"}
|
rankings/mmeb_ranking.csv
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Rank,Models,Model Size(B),Overall,Image-Overall,Video-Overall,Visdoc-Overall
|
| 2 |
+
1,"<a href=""https://interestfm-tte.github.io/"">IFM-TTE-7B</a>",8.29,74.07,77.9,59.19,79.48
|
| 3 |
+
2,"<a href=""https://github.com/360CVGroup/RzenEmbed"">RzenEmbed-v2-7B</a>",8.29,71.61,75.92,55.73,77.06
|
| 4 |
+
3,"<a href=""https://seed1-6-embedding.github.io"">seed-1.6-embedding</a>",unknown,71.27,77.78,55.34,73.44
|
| 5 |
+
4,RzenEmbed-v1-7B,8.29,68.88,73.6,48.87,76.8
|
| 6 |
+
5,"<a href=""https://huggingface.co/OpenSearch-AI/Ops-MM-embedding-v1-7B"">Ops-MM-embedding-v1-7B</a>",8.29,67.61,72.72,53.76,70.34
|
| 7 |
+
6,"<a href=""https://huggingface.co/zhibinlan/UME-R1-7B"">UME-R1-7B</a>",8.29,64.5,71.25,47.5,67.13
|
| 8 |
+
7,RzenEmbed-v1-2B,2.21,64.36,68.53,42.62,74.41
|
| 9 |
+
8,"<a href=""https://huggingface.co/OpenSearch-AI/Ops-MM-embedding-v1-2B"">Ops-MM-embedding-v1-2B</a>",2.21,63.44,69.03,47.56,66.96
|
| 10 |
+
9,"<a href=""https://arxiv.org/abs/2503.19900"">interestFM-UIR-CAFe-7B</a>",8.03,60.63,67.56,42.4,63.92
|
| 11 |
+
10,"<a href=""https://huggingface.co/zhibinlan/UME-R1-2B"">UME-R1-2B</a>",2.21,60.11,66.56,42.23,63.86
|
| 12 |
+
11,"<a href=""https://github.com/GaryGuTC/UniME-v2"">UniME-V2-LLaVA-OneVision-7B</a>",8.03,59.56,71.77,39.01,56.68
|
| 13 |
+
12,"<a href=""https://huggingface.co/VLM2Vec/VLM2Vec-V2.0"">VLM2Vec-V2.0-Qwen2VL-2B</a>",2.21,58.02,64.85,34.58,65.36
|
| 14 |
+
13,"<a href=""https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-7B-Instruct"">gme-Qwen2-VL-7B-Instruct</a>",8.29,57.83,55.95,38.43,75.18
|
| 15 |
+
14,"<a href=""https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct"">gme-Qwen2-VL-2B-Instruct</a>",2.21,54.08,51.89,33.64,72.71
|
| 16 |
+
15,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-7B"">VLM2Vec-V1-Qwen2VL-7B</a>",8.29,52.29,65.49,33.72,46.43
|
| 17 |
+
16,"<a href=""https://arxiv.org/abs/2503.19900"">interestFM-UIR-CAFe-0.5B</a>",0.894,49.68,55.43,35.87,51.41
|
| 18 |
+
17,"<a href=""https://huggingface.co/code-kunkun/LamRA-Ret-Qwen2.5VL-7b"">LamRA-Ret-Qwen2.5VL-7b</a>",8.29,47.41,52.43,33.6,50.24
|
| 19 |
+
18,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-2B"">VLM2Vec-V1-Qwen2VL-2B</a>",2.21,46.96,59.74,28.61,41.55
|
| 20 |
+
19,"<a href=""https://huggingface.co/vidore/colpali-v1.3"">colpali-v1.3</a>",2.92,44.44,34.89,28.17,70.97
|
| 21 |
+
20,"<a href=""https://huggingface.co/code-kunkun/LamRA-Ret"">LamRA-Ret</a>",8.29,40.38,54.08,34.95,23.91
|
| 22 |
+
21,"<a href=""https://github.com/QQ-MM/QQMM-embed"">QQMM-embed-v2</a>",8.29,34.74,75.28,0.0,0.0
|
| 23 |
+
22,OEmbedding-v1-7B,8.29,34.18,74.05,0.0,0.0
|
| 24 |
+
23,ReCo-7B,8.29,34.09,73.87,0.0,0.0
|
| 25 |
+
24,TCE-v1,8.0,33.39,72.36,0.0,0.0
|
rankings/mmeb_ranking.jsonl
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"Rank":1,"Models":"<a href=\"https:\/\/interestfm-tte.github.io\/\">IFM-TTE-7B<\/a>","Model Size(B)":8.29,"Overall":74.07,"Image-Overall":77.9,"Video-Overall":59.19,"Visdoc-Overall":79.48}
|
| 2 |
+
{"Rank":2,"Models":"<a href=\"https:\/\/github.com\/360CVGroup\/RzenEmbed\">RzenEmbed-v2-7B<\/a>","Model Size(B)":8.29,"Overall":71.61,"Image-Overall":75.92,"Video-Overall":55.73,"Visdoc-Overall":77.06}
|
| 3 |
+
{"Rank":3,"Models":"<a href=\"https:\/\/seed1-6-embedding.github.io\">seed-1.6-embedding<\/a>","Model Size(B)":"unknown","Overall":71.27,"Image-Overall":77.78,"Video-Overall":55.34,"Visdoc-Overall":73.44}
|
| 4 |
+
{"Rank":4,"Models":"RzenEmbed-v1-7B","Model Size(B)":8.29,"Overall":68.88,"Image-Overall":73.6,"Video-Overall":48.87,"Visdoc-Overall":76.8}
|
| 5 |
+
{"Rank":5,"Models":"<a href=\"https:\/\/huggingface.co\/OpenSearch-AI\/Ops-MM-embedding-v1-7B\">Ops-MM-embedding-v1-7B<\/a>","Model Size(B)":8.29,"Overall":67.61,"Image-Overall":72.72,"Video-Overall":53.76,"Visdoc-Overall":70.34}
|
| 6 |
+
{"Rank":6,"Models":"<a href=\"https:\/\/huggingface.co\/zhibinlan\/UME-R1-7B\">UME-R1-7B<\/a>","Model Size(B)":8.29,"Overall":64.5,"Image-Overall":71.25,"Video-Overall":47.5,"Visdoc-Overall":67.13}
|
| 7 |
+
{"Rank":7,"Models":"RzenEmbed-v1-2B","Model Size(B)":2.21,"Overall":64.36,"Image-Overall":68.53,"Video-Overall":42.62,"Visdoc-Overall":74.41}
|
| 8 |
+
{"Rank":8,"Models":"<a href=\"https:\/\/huggingface.co\/OpenSearch-AI\/Ops-MM-embedding-v1-2B\">Ops-MM-embedding-v1-2B<\/a>","Model Size(B)":2.21,"Overall":63.44,"Image-Overall":69.03,"Video-Overall":47.56,"Visdoc-Overall":66.96}
|
| 9 |
+
{"Rank":9,"Models":"<a href=\"https:\/\/arxiv.org\/abs\/2503.19900\">interestFM-UIR-CAFe-7B<\/a>","Model Size(B)":8.03,"Overall":60.63,"Image-Overall":67.56,"Video-Overall":42.4,"Visdoc-Overall":63.92}
|
| 10 |
+
{"Rank":10,"Models":"<a href=\"https:\/\/huggingface.co\/zhibinlan\/UME-R1-2B\">UME-R1-2B<\/a>","Model Size(B)":2.21,"Overall":60.11,"Image-Overall":66.56,"Video-Overall":42.23,"Visdoc-Overall":63.86}
|
| 11 |
+
{"Rank":11,"Models":"<a href=\"https:\/\/github.com\/GaryGuTC\/UniME-v2\">UniME-V2-LLaVA-OneVision-7B<\/a>","Model Size(B)":8.03,"Overall":59.56,"Image-Overall":71.77,"Video-Overall":39.01,"Visdoc-Overall":56.68}
|
| 12 |
+
{"Rank":12,"Models":"<a href=\"https:\/\/huggingface.co\/VLM2Vec\/VLM2Vec-V2.0\">VLM2Vec-V2.0-Qwen2VL-2B<\/a>","Model Size(B)":2.21,"Overall":58.02,"Image-Overall":64.85,"Video-Overall":34.58,"Visdoc-Overall":65.36}
|
| 13 |
+
{"Rank":13,"Models":"<a href=\"https:\/\/huggingface.co\/Alibaba-NLP\/gme-Qwen2-VL-7B-Instruct\">gme-Qwen2-VL-7B-Instruct<\/a>","Model Size(B)":8.29,"Overall":57.83,"Image-Overall":55.95,"Video-Overall":38.43,"Visdoc-Overall":75.18}
|
| 14 |
+
{"Rank":14,"Models":"<a href=\"https:\/\/huggingface.co\/Alibaba-NLP\/gme-Qwen2-VL-2B-Instruct\">gme-Qwen2-VL-2B-Instruct<\/a>","Model Size(B)":2.21,"Overall":54.08,"Image-Overall":51.89,"Video-Overall":33.64,"Visdoc-Overall":72.71}
|
| 15 |
+
{"Rank":15,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Qwen2VL-7B\">VLM2Vec-V1-Qwen2VL-7B<\/a>","Model Size(B)":8.29,"Overall":52.29,"Image-Overall":65.49,"Video-Overall":33.72,"Visdoc-Overall":46.43}
|
| 16 |
+
{"Rank":16,"Models":"<a href=\"https:\/\/arxiv.org\/abs\/2503.19900\">interestFM-UIR-CAFe-0.5B<\/a>","Model Size(B)":0.894,"Overall":49.68,"Image-Overall":55.43,"Video-Overall":35.87,"Visdoc-Overall":51.41}
|
| 17 |
+
{"Rank":17,"Models":"<a href=\"https:\/\/huggingface.co\/code-kunkun\/LamRA-Ret-Qwen2.5VL-7b\">LamRA-Ret-Qwen2.5VL-7b<\/a>","Model Size(B)":8.29,"Overall":47.41,"Image-Overall":52.43,"Video-Overall":33.6,"Visdoc-Overall":50.24}
|
| 18 |
+
{"Rank":18,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Qwen2VL-2B\">VLM2Vec-V1-Qwen2VL-2B<\/a>","Model Size(B)":2.21,"Overall":46.96,"Image-Overall":59.74,"Video-Overall":28.61,"Visdoc-Overall":41.55}
|
| 19 |
+
{"Rank":19,"Models":"<a href=\"https:\/\/huggingface.co\/vidore\/colpali-v1.3\">colpali-v1.3<\/a>","Model Size(B)":2.92,"Overall":44.44,"Image-Overall":34.89,"Video-Overall":28.17,"Visdoc-Overall":70.97}
|
| 20 |
+
{"Rank":20,"Models":"<a href=\"https:\/\/huggingface.co\/code-kunkun\/LamRA-Ret\">LamRA-Ret<\/a>","Model Size(B)":8.29,"Overall":40.38,"Image-Overall":54.08,"Video-Overall":34.95,"Visdoc-Overall":23.91}
|
| 21 |
+
{"Rank":21,"Models":"<a href=\"https:\/\/github.com\/QQ-MM\/QQMM-embed\">QQMM-embed-v2<\/a>","Model Size(B)":8.29,"Overall":34.74,"Image-Overall":75.28,"Video-Overall":0.0,"Visdoc-Overall":0.0}
|
| 22 |
+
{"Rank":22,"Models":"OEmbedding-v1-7B","Model Size(B)":8.29,"Overall":34.18,"Image-Overall":74.05,"Video-Overall":0.0,"Visdoc-Overall":0.0}
|
| 23 |
+
{"Rank":23,"Models":"ReCo-7B","Model Size(B)":8.29,"Overall":34.09,"Image-Overall":73.87,"Video-Overall":0.0,"Visdoc-Overall":0.0}
|
| 24 |
+
{"Rank":24,"Models":"TCE-v1","Model Size(B)":8.0,"Overall":33.39,"Image-Overall":72.36,"Video-Overall":0.0,"Visdoc-Overall":0.0}
|
rankings/video_ranking.csv
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Rank,Models,Model Size(B),Video-Overall,V-CLS,V-QA,V-RET,V-MRET,K700,UCF101,HMDB51,SmthSmthV2,Breakfast,Video-MME,MVBench,NExTQA,EgoSchema,ActivityNetQA,MSR-VTT,MSVD,DiDeMo,VATEX,YouCook2,QVHighlight,Charades-STA,MomentSeeker
|
| 2 |
+
1,"<a href=""https://interestfm-tte.github.io/"">IFM-TTE-7B</a>",8.29,59.19,60.53,67.92,51.73,54.86,57.4,79.6,65.4,62.6,37.64,59.67,62.85,73.76,65.8,77.5,52.7,73.13,49.7,51.45,31.65,64.64,50.34,49.61
|
| 3 |
+
2,"<a href=""https://github.com/360CVGroup/RzenEmbed"">RzenEmbed-v2-7B</a>",8.29,55.73,58.82,63.5,50.97,45.54,57.1,75.7,63.4,66.5,31.41,51.48,60.3,75.91,51.2,78.6,50.6,72.09,57.27,47.57,27.34,64.27,24.62,47.72
|
| 4 |
+
3,"<a href=""https://seed1-6-embedding.github.io"">seed-1.6-embedding</a>",unknown,55.34,54.99,60.85,51.33,53.45,48.0,74.2,63.9,61.6,27.25,53.96,53.27,66.2,52.2,78.6,55.3,71.34,56.67,48.77,24.57,71.75,29.3,59.3
|
| 5 |
+
4,"<a href=""https://huggingface.co/OpenSearch-AI/Ops-MM-embedding-v1-7B"">Ops-MM-embedding-v1-7B</a>",8.29,53.76,59.68,62.22,45.72,43.21,59.0,78.8,59.3,64.6,36.72,50.89,56.97,67.06,59.6,76.6,49.4,67.16,46.12,43.95,21.96,58.17,24.9,46.56
|
| 6 |
+
5,RzenEmbed-v1-7B,8.29,48.87,52.76,56.18,41.89,41.82,55.4,73.1,56.7,56.9,21.71,48.67,48.58,66.77,42.8,74.1,45.1,64.93,41.24,38.57,19.6,60.2,22.15,43.11
|
| 7 |
+
6,"<a href=""https://huggingface.co/OpenSearch-AI/Ops-MM-embedding-v1-2B"">Ops-MM-embedding-v1-2B</a>",2.21,47.56,53.61,55.65,41.75,33.68,52.0,71.4,55.5,58.9,30.25,44.63,50.75,57.26,53.8,71.8,46.0,66.12,39.84,40.24,16.55,39.52,19.53,42.0
|
| 8 |
+
7,"<a href=""https://huggingface.co/zhibinlan/UME-R1-7B"">UME-R1-7B</a>",8.29,47.5,48.6,60.7,38.16,39.26,42.8,70.0,58.3,50.4,21.48,47.26,58.2,69.62,52.4,76.0,38.9,60.75,40.04,32.65,18.46,54.85,21.87,41.06
|
| 9 |
+
8,RzenEmbed-v1-2B,2.21,42.62,45.56,47.54,38.32,36.69,46.4,69.9,48.9,49.9,12.7,39.78,41.58,49.53,35.6,71.2,43.0,58.36,39.54,33.79,16.89,51.52,18.16,40.39
|
| 10 |
+
9,"<a href=""https://arxiv.org/abs/2503.19900"">interestFM-UIR-CAFe-7B</a>",8.03,42.4,35.81,58.66,34.44,39.53,40.1,39.6,46.9,35.8,16.63,46.0,48.9,62.42,60.0,76.0,36.5,56.42,37.85,31.96,9.47,58.45,18.71,41.44
|
| 11 |
+
10,"<a href=""https://huggingface.co/zhibinlan/UME-R1-2B"">UME-R1-2B</a>",2.21,42.23,44.32,50.95,32.93,39.7,35.8,67.2,54.4,44.1,20.09,41.67,49.88,59.98,45.4,57.8,34.3,55.37,32.37,29.88,12.71,57.53,20.36,41.22
|
| 12 |
+
11,"<a href=""https://github.com/GaryGuTC/UniME-v2"">UniME-V2-LLaVA-OneVision-7B</a>",8.03,39.01,37.16,50.59,28.93,39.59,38.0,61.8,42.8,25.2,18.01,35.78,42.2,58.76,51.6,64.6,27.6,57.46,31.47,22.51,5.6,56.79,29.99,32.0
|
| 13 |
+
12,"<a href=""https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-7B-Instruct"">gme-Qwen2-VL-7B-Instruct</a>",8.29,38.43,37.44,50.35,28.37,36.96,39.7,54.7,47.9,30.6,14.32,39.19,46.62,53.55,46.8,65.6,31.8,49.7,26.39,24.88,9.09,59.46,14.03,37.39
|
| 14 |
+
13,"<a href=""https://arxiv.org/abs/2503.19900"">interestFM-UIR-CAFe-0.5B</a>",0.894,35.87,33.9,41.72,29.69,39.69,33.6,47.2,40.5,25.8,22.4,36.11,39.35,38.36,26.2,68.6,33.0,51.19,30.48,25.48,8.3,56.14,23.11,39.83
|
| 15 |
+
14,"<a href=""https://huggingface.co/code-kunkun/LamRA-Ret"">LamRA-Ret</a>",8.29,34.95,39.27,42.6,24.26,32.84,42.3,60.4,40.5,36.3,16.86,34.07,37.2,43.72,44.8,53.2,22.1,46.12,24.8,19.14,9.15,53.83,10.87,33.83
|
| 16 |
+
15,"<a href=""https://huggingface.co/VLM2Vec/VLM2Vec-V2.0"">VLM2Vec-V2.0-Qwen2VL-2B</a>",2.21,34.58,39.3,34.32,28.77,36.82,38.0,60.0,40.9,42.8,14.78,30.7,33.7,20.92,34.0,52.3,28.3,48.06,30.38,26.46,10.63,49.4,20.22,40.83
|
| 17 |
+
16,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-7B"">VLM2Vec-V1-Qwen2VL-7B</a>",8.29,33.72,39.08,29.96,29.0,38.93,35.5,61.8,42.2,32.1,23.79,27.81,28.48,20.29,21.8,51.4,34.5,46.72,29.28,25.52,9.0,57.71,19.81,39.28
|
| 18 |
+
17,"<a href=""https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct"">gme-Qwen2-VL-2B-Instruct</a>",2.21,33.64,34.91,42.0,25.56,31.07,35.2,52.4,43.4,29.9,13.63,34.26,37.45,39.49,40.8,58.0,27.3,47.61,22.01,22.96,7.9,43.58,14.86,34.78
|
| 19 |
+
18,"<a href=""https://huggingface.co/code-kunkun/LamRA-Ret-Qwen2.5VL-7b"">LamRA-Ret-Qwen2.5VL-7b</a>",8.29,33.6,32.86,42.63,23.18,37.16,32.1,53.0,33.8,25.3,20.09,35.11,37.62,44.93,47.0,48.5,25.0,41.94,22.81,18.65,7.52,60.85,18.84,31.78
|
| 20 |
+
19,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-2B"">VLM2Vec-V1-Qwen2VL-2B</a>",2.21,28.61,33.4,30.53,20.61,30.75,31.4,57.5,33.8,30.9,13.39,26.89,30.45,20.29,25.4,49.6,25.2,38.21,19.42,16.15,4.09,44.23,13.62,34.39
|
| 21 |
+
20,"<a href=""https://huggingface.co/vidore/colpali-v1.3"">colpali-v1.3</a>",2.92,28.17,26.71,37.84,21.56,25.52,23.4,49.4,24.8,25.1,10.85,30.59,33.7,35.21,38.4,51.3,17.6,45.37,22.81,16.68,5.32,19.94,29.02,27.61
|
| 22 |
+
21,"<a href=""https://github.com/QQ-MM/QQMM-embed"">QQMM-embed-v2</a>",8.29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
| 23 |
+
22,OEmbedding-v1-7B,8.29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
| 24 |
+
23,ReCo-7B,8.29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
| 25 |
+
24,TCE-v1,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
rankings/video_ranking.jsonl
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"Rank":1,"Models":"<a href=\"https:\/\/interestfm-tte.github.io\/\">IFM-TTE-7B<\/a>","Model Size(B)":8.29,"Video-Overall":59.19,"V-CLS":60.53,"V-QA":67.92,"V-RET":51.73,"V-MRET":54.86,"K700":57.4,"UCF101":79.6,"HMDB51":65.4,"SmthSmthV2":62.6,"Breakfast":37.64,"Video-MME":59.67,"MVBench":62.85,"NExTQA":73.76,"EgoSchema":65.8,"ActivityNetQA":77.5,"MSR-VTT":52.7,"MSVD":73.13,"DiDeMo":49.7,"VATEX":51.45,"YouCook2":31.65,"QVHighlight":64.64,"Charades-STA":50.34,"MomentSeeker":49.61}
|
| 2 |
+
{"Rank":2,"Models":"<a href=\"https:\/\/github.com\/360CVGroup\/RzenEmbed\">RzenEmbed-v2-7B<\/a>","Model Size(B)":8.29,"Video-Overall":55.73,"V-CLS":58.82,"V-QA":63.5,"V-RET":50.97,"V-MRET":45.54,"K700":57.1,"UCF101":75.7,"HMDB51":63.4,"SmthSmthV2":66.5,"Breakfast":31.41,"Video-MME":51.48,"MVBench":60.3,"NExTQA":75.91,"EgoSchema":51.2,"ActivityNetQA":78.6,"MSR-VTT":50.6,"MSVD":72.09,"DiDeMo":57.27,"VATEX":47.57,"YouCook2":27.34,"QVHighlight":64.27,"Charades-STA":24.62,"MomentSeeker":47.72}
|
| 3 |
+
{"Rank":3,"Models":"<a href=\"https:\/\/seed1-6-embedding.github.io\">seed-1.6-embedding<\/a>","Model Size(B)":"unknown","Video-Overall":55.34,"V-CLS":54.99,"V-QA":60.85,"V-RET":51.33,"V-MRET":53.45,"K700":48.0,"UCF101":74.2,"HMDB51":63.9,"SmthSmthV2":61.6,"Breakfast":27.25,"Video-MME":53.96,"MVBench":53.27,"NExTQA":66.2,"EgoSchema":52.2,"ActivityNetQA":78.6,"MSR-VTT":55.3,"MSVD":71.34,"DiDeMo":56.67,"VATEX":48.77,"YouCook2":24.57,"QVHighlight":71.75,"Charades-STA":29.3,"MomentSeeker":59.3}
|
| 4 |
+
{"Rank":4,"Models":"<a href=\"https:\/\/huggingface.co\/OpenSearch-AI\/Ops-MM-embedding-v1-7B\">Ops-MM-embedding-v1-7B<\/a>","Model Size(B)":8.29,"Video-Overall":53.76,"V-CLS":59.68,"V-QA":62.22,"V-RET":45.72,"V-MRET":43.21,"K700":59.0,"UCF101":78.8,"HMDB51":59.3,"SmthSmthV2":64.6,"Breakfast":36.72,"Video-MME":50.89,"MVBench":56.97,"NExTQA":67.06,"EgoSchema":59.6,"ActivityNetQA":76.6,"MSR-VTT":49.4,"MSVD":67.16,"DiDeMo":46.12,"VATEX":43.95,"YouCook2":21.96,"QVHighlight":58.17,"Charades-STA":24.9,"MomentSeeker":46.56}
|
| 5 |
+
{"Rank":5,"Models":"RzenEmbed-v1-7B","Model Size(B)":8.29,"Video-Overall":48.87,"V-CLS":52.76,"V-QA":56.18,"V-RET":41.89,"V-MRET":41.82,"K700":55.4,"UCF101":73.1,"HMDB51":56.7,"SmthSmthV2":56.9,"Breakfast":21.71,"Video-MME":48.67,"MVBench":48.58,"NExTQA":66.77,"EgoSchema":42.8,"ActivityNetQA":74.1,"MSR-VTT":45.1,"MSVD":64.93,"DiDeMo":41.24,"VATEX":38.57,"YouCook2":19.6,"QVHighlight":60.2,"Charades-STA":22.15,"MomentSeeker":43.11}
|
| 6 |
+
{"Rank":6,"Models":"<a href=\"https:\/\/huggingface.co\/OpenSearch-AI\/Ops-MM-embedding-v1-2B\">Ops-MM-embedding-v1-2B<\/a>","Model Size(B)":2.21,"Video-Overall":47.56,"V-CLS":53.61,"V-QA":55.65,"V-RET":41.75,"V-MRET":33.68,"K700":52.0,"UCF101":71.4,"HMDB51":55.5,"SmthSmthV2":58.9,"Breakfast":30.25,"Video-MME":44.63,"MVBench":50.75,"NExTQA":57.26,"EgoSchema":53.8,"ActivityNetQA":71.8,"MSR-VTT":46.0,"MSVD":66.12,"DiDeMo":39.84,"VATEX":40.24,"YouCook2":16.55,"QVHighlight":39.52,"Charades-STA":19.53,"MomentSeeker":42.0}
|
| 7 |
+
{"Rank":7,"Models":"<a href=\"https:\/\/huggingface.co\/zhibinlan\/UME-R1-7B\">UME-R1-7B<\/a>","Model Size(B)":8.29,"Video-Overall":47.5,"V-CLS":48.6,"V-QA":60.7,"V-RET":38.16,"V-MRET":39.26,"K700":42.8,"UCF101":70.0,"HMDB51":58.3,"SmthSmthV2":50.4,"Breakfast":21.48,"Video-MME":47.26,"MVBench":58.2,"NExTQA":69.62,"EgoSchema":52.4,"ActivityNetQA":76.0,"MSR-VTT":38.9,"MSVD":60.75,"DiDeMo":40.04,"VATEX":32.65,"YouCook2":18.46,"QVHighlight":54.85,"Charades-STA":21.87,"MomentSeeker":41.06}
|
| 8 |
+
{"Rank":8,"Models":"RzenEmbed-v1-2B","Model Size(B)":2.21,"Video-Overall":42.62,"V-CLS":45.56,"V-QA":47.54,"V-RET":38.32,"V-MRET":36.69,"K700":46.4,"UCF101":69.9,"HMDB51":48.9,"SmthSmthV2":49.9,"Breakfast":12.7,"Video-MME":39.78,"MVBench":41.58,"NExTQA":49.53,"EgoSchema":35.6,"ActivityNetQA":71.2,"MSR-VTT":43.0,"MSVD":58.36,"DiDeMo":39.54,"VATEX":33.79,"YouCook2":16.89,"QVHighlight":51.52,"Charades-STA":18.16,"MomentSeeker":40.39}
|
| 9 |
+
{"Rank":9,"Models":"<a href=\"https:\/\/arxiv.org\/abs\/2503.19900\">interestFM-UIR-CAFe-7B<\/a>","Model Size(B)":8.03,"Video-Overall":42.4,"V-CLS":35.81,"V-QA":58.66,"V-RET":34.44,"V-MRET":39.53,"K700":40.1,"UCF101":39.6,"HMDB51":46.9,"SmthSmthV2":35.8,"Breakfast":16.63,"Video-MME":46.0,"MVBench":48.9,"NExTQA":62.42,"EgoSchema":60.0,"ActivityNetQA":76.0,"MSR-VTT":36.5,"MSVD":56.42,"DiDeMo":37.85,"VATEX":31.96,"YouCook2":9.47,"QVHighlight":58.45,"Charades-STA":18.71,"MomentSeeker":41.44}
|
| 10 |
+
{"Rank":10,"Models":"<a href=\"https:\/\/huggingface.co\/zhibinlan\/UME-R1-2B\">UME-R1-2B<\/a>","Model Size(B)":2.21,"Video-Overall":42.23,"V-CLS":44.32,"V-QA":50.95,"V-RET":32.93,"V-MRET":39.7,"K700":35.8,"UCF101":67.2,"HMDB51":54.4,"SmthSmthV2":44.1,"Breakfast":20.09,"Video-MME":41.67,"MVBench":49.88,"NExTQA":59.98,"EgoSchema":45.4,"ActivityNetQA":57.8,"MSR-VTT":34.3,"MSVD":55.37,"DiDeMo":32.37,"VATEX":29.88,"YouCook2":12.71,"QVHighlight":57.53,"Charades-STA":20.36,"MomentSeeker":41.22}
|
| 11 |
+
{"Rank":11,"Models":"<a href=\"https:\/\/github.com\/GaryGuTC\/UniME-v2\">UniME-V2-LLaVA-OneVision-7B<\/a>","Model Size(B)":8.03,"Video-Overall":39.01,"V-CLS":37.16,"V-QA":50.59,"V-RET":28.93,"V-MRET":39.59,"K700":38.0,"UCF101":61.8,"HMDB51":42.8,"SmthSmthV2":25.2,"Breakfast":18.01,"Video-MME":35.78,"MVBench":42.2,"NExTQA":58.76,"EgoSchema":51.6,"ActivityNetQA":64.6,"MSR-VTT":27.6,"MSVD":57.46,"DiDeMo":31.47,"VATEX":22.51,"YouCook2":5.6,"QVHighlight":56.79,"Charades-STA":29.99,"MomentSeeker":32.0}
|
| 12 |
+
{"Rank":12,"Models":"<a href=\"https:\/\/huggingface.co\/Alibaba-NLP\/gme-Qwen2-VL-7B-Instruct\">gme-Qwen2-VL-7B-Instruct<\/a>","Model Size(B)":8.29,"Video-Overall":38.43,"V-CLS":37.44,"V-QA":50.35,"V-RET":28.37,"V-MRET":36.96,"K700":39.7,"UCF101":54.7,"HMDB51":47.9,"SmthSmthV2":30.6,"Breakfast":14.32,"Video-MME":39.19,"MVBench":46.62,"NExTQA":53.55,"EgoSchema":46.8,"ActivityNetQA":65.6,"MSR-VTT":31.8,"MSVD":49.7,"DiDeMo":26.39,"VATEX":24.88,"YouCook2":9.09,"QVHighlight":59.46,"Charades-STA":14.03,"MomentSeeker":37.39}
|
| 13 |
+
{"Rank":13,"Models":"<a href=\"https:\/\/arxiv.org\/abs\/2503.19900\">interestFM-UIR-CAFe-0.5B<\/a>","Model Size(B)":0.894,"Video-Overall":35.87,"V-CLS":33.9,"V-QA":41.72,"V-RET":29.69,"V-MRET":39.69,"K700":33.6,"UCF101":47.2,"HMDB51":40.5,"SmthSmthV2":25.8,"Breakfast":22.4,"Video-MME":36.11,"MVBench":39.35,"NExTQA":38.36,"EgoSchema":26.2,"ActivityNetQA":68.6,"MSR-VTT":33.0,"MSVD":51.19,"DiDeMo":30.48,"VATEX":25.48,"YouCook2":8.3,"QVHighlight":56.14,"Charades-STA":23.11,"MomentSeeker":39.83}
|
| 14 |
+
{"Rank":14,"Models":"<a href=\"https:\/\/huggingface.co\/code-kunkun\/LamRA-Ret\">LamRA-Ret<\/a>","Model Size(B)":8.29,"Video-Overall":34.95,"V-CLS":39.27,"V-QA":42.6,"V-RET":24.26,"V-MRET":32.84,"K700":42.3,"UCF101":60.4,"HMDB51":40.5,"SmthSmthV2":36.3,"Breakfast":16.86,"Video-MME":34.07,"MVBench":37.2,"NExTQA":43.72,"EgoSchema":44.8,"ActivityNetQA":53.2,"MSR-VTT":22.1,"MSVD":46.12,"DiDeMo":24.8,"VATEX":19.14,"YouCook2":9.15,"QVHighlight":53.83,"Charades-STA":10.87,"MomentSeeker":33.83}
|
| 15 |
+
{"Rank":15,"Models":"<a href=\"https:\/\/huggingface.co\/VLM2Vec\/VLM2Vec-V2.0\">VLM2Vec-V2.0-Qwen2VL-2B<\/a>","Model Size(B)":2.21,"Video-Overall":34.58,"V-CLS":39.3,"V-QA":34.32,"V-RET":28.77,"V-MRET":36.82,"K700":38.0,"UCF101":60.0,"HMDB51":40.9,"SmthSmthV2":42.8,"Breakfast":14.78,"Video-MME":30.7,"MVBench":33.7,"NExTQA":20.92,"EgoSchema":34.0,"ActivityNetQA":52.3,"MSR-VTT":28.3,"MSVD":48.06,"DiDeMo":30.38,"VATEX":26.46,"YouCook2":10.63,"QVHighlight":49.4,"Charades-STA":20.22,"MomentSeeker":40.83}
|
| 16 |
+
{"Rank":16,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Qwen2VL-7B\">VLM2Vec-V1-Qwen2VL-7B<\/a>","Model Size(B)":8.29,"Video-Overall":33.72,"V-CLS":39.08,"V-QA":29.96,"V-RET":29.0,"V-MRET":38.93,"K700":35.5,"UCF101":61.8,"HMDB51":42.2,"SmthSmthV2":32.1,"Breakfast":23.79,"Video-MME":27.81,"MVBench":28.48,"NExTQA":20.29,"EgoSchema":21.8,"ActivityNetQA":51.4,"MSR-VTT":34.5,"MSVD":46.72,"DiDeMo":29.28,"VATEX":25.52,"YouCook2":9.0,"QVHighlight":57.71,"Charades-STA":19.81,"MomentSeeker":39.28}
|
| 17 |
+
{"Rank":17,"Models":"<a href=\"https:\/\/huggingface.co\/Alibaba-NLP\/gme-Qwen2-VL-2B-Instruct\">gme-Qwen2-VL-2B-Instruct<\/a>","Model Size(B)":2.21,"Video-Overall":33.64,"V-CLS":34.91,"V-QA":42.0,"V-RET":25.56,"V-MRET":31.07,"K700":35.2,"UCF101":52.4,"HMDB51":43.4,"SmthSmthV2":29.9,"Breakfast":13.63,"Video-MME":34.26,"MVBench":37.45,"NExTQA":39.49,"EgoSchema":40.8,"ActivityNetQA":58.0,"MSR-VTT":27.3,"MSVD":47.61,"DiDeMo":22.01,"VATEX":22.96,"YouCook2":7.9,"QVHighlight":43.58,"Charades-STA":14.86,"MomentSeeker":34.78}
|
| 18 |
+
{"Rank":18,"Models":"<a href=\"https:\/\/huggingface.co\/code-kunkun\/LamRA-Ret-Qwen2.5VL-7b\">LamRA-Ret-Qwen2.5VL-7b<\/a>","Model Size(B)":8.29,"Video-Overall":33.6,"V-CLS":32.86,"V-QA":42.63,"V-RET":23.18,"V-MRET":37.16,"K700":32.1,"UCF101":53.0,"HMDB51":33.8,"SmthSmthV2":25.3,"Breakfast":20.09,"Video-MME":35.11,"MVBench":37.62,"NExTQA":44.93,"EgoSchema":47.0,"ActivityNetQA":48.5,"MSR-VTT":25.0,"MSVD":41.94,"DiDeMo":22.81,"VATEX":18.65,"YouCook2":7.52,"QVHighlight":60.85,"Charades-STA":18.84,"MomentSeeker":31.78}
|
| 19 |
+
{"Rank":19,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Qwen2VL-2B\">VLM2Vec-V1-Qwen2VL-2B<\/a>","Model Size(B)":2.21,"Video-Overall":28.61,"V-CLS":33.4,"V-QA":30.53,"V-RET":20.61,"V-MRET":30.75,"K700":31.4,"UCF101":57.5,"HMDB51":33.8,"SmthSmthV2":30.9,"Breakfast":13.39,"Video-MME":26.89,"MVBench":30.45,"NExTQA":20.29,"EgoSchema":25.4,"ActivityNetQA":49.6,"MSR-VTT":25.2,"MSVD":38.21,"DiDeMo":19.42,"VATEX":16.15,"YouCook2":4.09,"QVHighlight":44.23,"Charades-STA":13.62,"MomentSeeker":34.39}
|
| 20 |
+
{"Rank":20,"Models":"<a href=\"https:\/\/huggingface.co\/vidore\/colpali-v1.3\">colpali-v1.3<\/a>","Model Size(B)":2.92,"Video-Overall":28.17,"V-CLS":26.71,"V-QA":37.84,"V-RET":21.56,"V-MRET":25.52,"K700":23.4,"UCF101":49.4,"HMDB51":24.8,"SmthSmthV2":25.1,"Breakfast":10.85,"Video-MME":30.59,"MVBench":33.7,"NExTQA":35.21,"EgoSchema":38.4,"ActivityNetQA":51.3,"MSR-VTT":17.6,"MSVD":45.37,"DiDeMo":22.81,"VATEX":16.68,"YouCook2":5.32,"QVHighlight":19.94,"Charades-STA":29.02,"MomentSeeker":27.61}
|
| 21 |
+
{"Rank":21,"Models":"<a href=\"https:\/\/github.com\/QQ-MM\/QQMM-embed\">QQMM-embed-v2<\/a>","Model Size(B)":8.29,"Video-Overall":0.0,"V-CLS":0.0,"V-QA":0.0,"V-RET":0.0,"V-MRET":0.0,"K700":0.0,"UCF101":0.0,"HMDB51":0.0,"SmthSmthV2":0.0,"Breakfast":0.0,"Video-MME":0.0,"MVBench":0.0,"NExTQA":0.0,"EgoSchema":0.0,"ActivityNetQA":0.0,"MSR-VTT":0.0,"MSVD":0.0,"DiDeMo":0.0,"VATEX":0.0,"YouCook2":0.0,"QVHighlight":0.0,"Charades-STA":0.0,"MomentSeeker":0.0}
|
| 22 |
+
{"Rank":22,"Models":"OEmbedding-v1-7B","Model Size(B)":8.29,"Video-Overall":0.0,"V-CLS":0.0,"V-QA":0.0,"V-RET":0.0,"V-MRET":0.0,"K700":0.0,"UCF101":0.0,"HMDB51":0.0,"SmthSmthV2":0.0,"Breakfast":0.0,"Video-MME":0.0,"MVBench":0.0,"NExTQA":0.0,"EgoSchema":0.0,"ActivityNetQA":0.0,"MSR-VTT":0.0,"MSVD":0.0,"DiDeMo":0.0,"VATEX":0.0,"YouCook2":0.0,"QVHighlight":0.0,"Charades-STA":0.0,"MomentSeeker":0.0}
|
| 23 |
+
{"Rank":23,"Models":"ReCo-7B","Model Size(B)":8.29,"Video-Overall":0.0,"V-CLS":0.0,"V-QA":0.0,"V-RET":0.0,"V-MRET":0.0,"K700":0.0,"UCF101":0.0,"HMDB51":0.0,"SmthSmthV2":0.0,"Breakfast":0.0,"Video-MME":0.0,"MVBench":0.0,"NExTQA":0.0,"EgoSchema":0.0,"ActivityNetQA":0.0,"MSR-VTT":0.0,"MSVD":0.0,"DiDeMo":0.0,"VATEX":0.0,"YouCook2":0.0,"QVHighlight":0.0,"Charades-STA":0.0,"MomentSeeker":0.0}
|
| 24 |
+
{"Rank":24,"Models":"TCE-v1","Model Size(B)":8.0,"Video-Overall":0.0,"V-CLS":0.0,"V-QA":0.0,"V-RET":0.0,"V-MRET":0.0,"K700":0.0,"UCF101":0.0,"HMDB51":0.0,"SmthSmthV2":0.0,"Breakfast":0.0,"Video-MME":0.0,"MVBench":0.0,"NExTQA":0.0,"EgoSchema":0.0,"ActivityNetQA":0.0,"MSR-VTT":0.0,"MSVD":0.0,"DiDeMo":0.0,"VATEX":0.0,"YouCook2":0.0,"QVHighlight":0.0,"Charades-STA":0.0,"MomentSeeker":0.0}
|
rankings/visdoc_ranking.csv
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Rank,Models,Model Size(B),Visdoc-Overall,ViDoRe-V1,ViDoRe-V2,VisRAG,VisDoc-OOD,ViDoRe_arxivqa,ViDoRe_docvqa,ViDoRe_infovqa,ViDoRe_tabfquad,ViDoRe_tatdqa,ViDoRe_shiftproject,ViDoRe_syntheticDocQA_artificial_intelligence,ViDoRe_syntheticDocQA_energy,ViDoRe_syntheticDocQA_government_reports,ViDoRe_syntheticDocQA_healthcare_industry,ViDoRe_esg_reports_human_labeled_v2,ViDoRe_biomedical_lectures_v2_multilingual,ViDoRe_economics_reports_v2_multilingual,ViDoRe_esg_reports_v2_multilingual,VisRAG_ArxivQA,VisRAG_ChartQA,VisRAG_MP-DocVQA,VisRAG_SlideVQA,VisRAG_InfoVQA,VisRAG_PlotQA,ViDoSeek-page,ViDoSeek-doc,MMLongBench-page,MMLongBench-doc
|
| 2 |
+
1,"<a href=""https://interestfm-tte.github.io/"">IFM-TTE-7B</a>",8.29,79.48,85.19,71.5,92.75,53.27,84.53,45.76,88.33,94.28,55.31,89.76,99.26,97.26,98.28,99.13,87.5,64.85,53.94,79.73,92.73,95.07,87.31,95.9,93.0,92.49,50.29,82.6,28.18,52.03
|
| 3 |
+
2,"<a href=""https://github.com/360CVGroup/RzenEmbed"">RzenEmbed-v2-7B</a>",8.29,77.06,89.7,60.7,88.7,44.38,88.42,59.61,92.85,95.85,76.5,91.76,99.63,95.71,97.02,99.63,65.61,61.71,59.1,56.39,89.38,89.61,90.59,95.68,94.11,72.81,23.09,84.02,15.62,54.78
|
| 4 |
+
3,RzenEmbed-v1-7B,8.29,76.8,89.47,60.77,87.92,44.44,86.89,57.99,92.24,96.25,75.22,93.35,99.63,95.25,98.28,99.63,63.67,61.58,60.8,57.05,86.97,88.34,90.7,95.75,94.37,71.39,23.06,83.92,16.1,54.67
|
| 5 |
+
4,"<a href=""https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-7B-Instruct"">gme-Qwen2-VL-7B-Instruct</a>",8.29,75.18,89.44,55.61,84.99,44.4,86.86,57.46,91.6,94.64,74.1,96.76,99.63,95.32,98.76,99.26,63.37,49.49,54.21,55.38,87.35,81.9,89.22,94.54,93.52,63.42,23.24,83.88,16.19,54.29
|
| 6 |
+
5,RzenEmbed-v1-2B,2.21,74.41,86.98,57.63,85.35,43.33,83.4,56.09,89.89,94.39,69.32,89.88,98.76,92.78,97.19,98.15,58.93,59.87,57.98,53.73,83.68,86.39,85.54,94.07,91.92,70.52,22.89,82.27,16.21,51.95
|
| 7 |
+
6,"<a href=""https://seed1-6-embedding.github.io"">seed-1.6-embedding</a>",unknown,73.44,85.53,56.57,84.74,43.14,88.46,60.36,90.62,87.82,74.98,73.34,96.32,92.83,93.54,97.05,63.3,57.14,53.85,51.99,85.79,81.88,89.43,94.9,92.41,64.02,22.8,82.57,15.57,51.61
|
| 8 |
+
7,"<a href=""https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct"">gme-Qwen2-VL-2B-Instruct</a>",2.21,72.71,86.15,53.96,82.52,43.12,82.76,53.11,90.16,93.33,69.94,89.47,97.52,91.91,94.59,98.69,60.95,53.98,50.19,50.73,82.0,79.88,84.42,93.38,91.36,64.09,21.62,83.62,15.82,51.43
|
| 9 |
+
8,"<a href=""https://huggingface.co/vidore/colpali-v1.3"">colpali-v1.3</a>",2.92,70.97,83.6,51.98,81.13,43.12,81.74,56.64,84.94,86.93,70.87,75.12,95.65,94.67,93.55,95.92,51.29,54.72,48.97,52.94,80.87,78.15,86.76,95.03,85.69,60.3,22.16,83.66,14.17,52.51
|
| 10 |
+
9,"<a href=""https://huggingface.co/OpenSearch-AI/Ops-MM-embedding-v1-7B"">Ops-MM-embedding-v1-7B</a>",8.29,70.34,80.05,59.59,79.31,43.34,78.17,49.11,86.59,91.4,55.59,76.17,90.99,87.53,91.56,93.39,66.27,54.34,60.92,56.82,78.18,79.53,78.2,91.11,87.16,61.71,22.47,83.47,15.89,51.51
|
| 11 |
+
10,"<a href=""https://huggingface.co/zhibinlan/UME-R1-7B"">UME-R1-7B</a>",8.29,67.13,75.66,50.53,83.7,37.55,73.56,41.1,80.83,90.24,46.67,64.96,89.47,85.7,89.82,94.29,50.41,50.73,57.8,43.18,80.45,84.95,83.43,91.47,89.2,72.7,21.28,75.33,12.32,41.28
|
| 12 |
+
11,"<a href=""https://huggingface.co/OpenSearch-AI/Ops-MM-embedding-v1-2B"">Ops-MM-embedding-v1-2B</a>",2.21,66.96,76.39,53.18,77.64,41.17,73.72,45.01,81.41,88.81,49.59,72.57,89.8,84.27,87.06,91.64,58.57,52.87,47.89,53.39,73.84,80.17,75.06,89.15,86.66,60.99,21.44,82.29,13.06,47.89
|
| 13 |
+
12,"<a href=""https://huggingface.co/VLM2Vec/VLM2Vec-V2.0"">VLM2Vec-V2.0-Qwen2VL-2B</a>",2.21,65.36,75.52,44.86,79.38,39.43,80.58,44.85,83.69,89.21,43.82,60.82,88.53,86.51,85.01,92.17,45.56,44.27,43.0,46.62,76.87,84.41,71.79,91.5,85.65,66.05,21.94,80.18,11.89,43.71
|
| 14 |
+
13,"<a href=""https://arxiv.org/abs/2503.19900"">interestFM-UIR-CAFe-7B</a>",8.03,63.92,70.68,49.57,79.45,38.07,73.29,38.27,80.59,80.7,37.79,52.02,85.99,84.78,84.96,88.37,50.67,50.89,54.38,42.33,73.96,82.71,75.15,87.58,87.91,69.42,22.52,73.82,13.32,42.61
|
| 15 |
+
14,"<a href=""https://huggingface.co/zhibinlan/UME-R1-2B"">UME-R1-2B</a>",2.21,63.86,72.39,46.16,79.22,37.17,73.86,37.85,76.19,86.11,40.63,66.79,85.87,83.25,82.56,90.83,50.16,46.15,45.69,42.65,74.26,85.98,75.59,87.11,84.38,67.98,21.2,75.89,11.9,39.68
|
| 16 |
+
15,"<a href=""https://github.com/GaryGuTC/UniME-v2"">UniME-V2-LLaVA-OneVision-7B</a>",8.03,56.68,61.76,42.0,70.53,37.86,51.9,38.17,73.16,57.75,35.48,45.42,76.85,77.27,79.87,81.71,54.67,33.75,35.95,43.63,53.13,83.67,66.4,86.44,82.58,50.96,22.89,75.79,11.96,40.8
|
| 17 |
+
16,"<a href=""https://arxiv.org/abs/2503.19900"">interestFM-UIR-CAFe-0.5B</a>",0.894,51.41,56.93,32.59,68.57,30.69,51.19,25.46,71.95,59.64,27.05,31.8,78.74,70.75,75.47,77.29,36.24,29.72,37.61,26.78,57.82,75.62,63.24,82.32,80.28,52.14,17.62,61.41,9.95,33.78
|
| 18 |
+
17,"<a href=""https://huggingface.co/code-kunkun/LamRA-Ret-Qwen2.5VL-7b"">LamRA-Ret-Qwen2.5VL-7b</a>",8.29,50.24,56.31,33.32,58.18,40.09,52.98,25.42,72.31,66.08,25.85,27.34,72.01,65.17,72.15,83.75,32.95,35.87,31.94,32.51,37.68,65.9,54.52,76.54,73.26,41.2,23.05,80.26,13.52,43.54
|
| 19 |
+
18,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-7B"">VLM2Vec-V1-Qwen2VL-7B</a>",8.29,46.43,56.95,9.41,59.12,38.09,60.2,34.71,70.38,78.18,27.62,38.62,67.65,60.42,61.82,69.85,6.82,5.06,13.88,11.89,52.61,70.23,52.81,72.77,71.96,34.35,22.28,77.81,11.82,40.46
|
| 20 |
+
19,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-2B"">VLM2Vec-V1-Qwen2VL-2B</a>",2.21,41.55,49.81,13.51,51.83,33.55,48.88,26.95,67.24,62.58,19.79,41.8,55.02,59.11,57.07,59.64,12.6,7.4,13.9,20.13,41.75,57.93,43.18,74.03,70.67,23.4,17.74,74.28,9.6,32.57
|
| 21 |
+
20,"<a href=""https://huggingface.co/code-kunkun/LamRA-Ret"">LamRA-Ret</a>",8.29,23.91,21.99,11.46,37.35,20.99,10.8,19.13,46.25,42.84,11.43,12.04,10.32,24.77,16.36,25.94,7.58,13.28,19.08,5.92,2.02,41.34,33.38,56.49,56.34,34.55,11.29,37.14,7.95,27.58
|
| 22 |
+
21,"<a href=""https://github.com/QQ-MM/QQMM-embed"">QQMM-embed-v2</a>",8.29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
| 23 |
+
22,OEmbedding-v1-7B,8.29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
| 24 |
+
23,ReCo-7B,8.29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
| 25 |
+
24,TCE-v1,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
|
rankings/visdoc_ranking.jsonl
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"Rank":1,"Models":"<a href=\"https:\/\/interestfm-tte.github.io\/\">IFM-TTE-7B<\/a>","Model Size(B)":8.29,"Visdoc-Overall":79.48,"ViDoRe-V1":85.19,"ViDoRe-V2":71.5,"VisRAG":92.75,"VisDoc-OOD":53.27,"ViDoRe_arxivqa":84.53,"ViDoRe_docvqa":45.76,"ViDoRe_infovqa":88.33,"ViDoRe_tabfquad":94.28,"ViDoRe_tatdqa":55.31,"ViDoRe_shiftproject":89.76,"ViDoRe_syntheticDocQA_artificial_intelligence":99.26,"ViDoRe_syntheticDocQA_energy":97.26,"ViDoRe_syntheticDocQA_government_reports":98.28,"ViDoRe_syntheticDocQA_healthcare_industry":99.13,"ViDoRe_esg_reports_human_labeled_v2":87.5,"ViDoRe_biomedical_lectures_v2_multilingual":64.85,"ViDoRe_economics_reports_v2_multilingual":53.94,"ViDoRe_esg_reports_v2_multilingual":79.73,"VisRAG_ArxivQA":92.73,"VisRAG_ChartQA":95.07,"VisRAG_MP-DocVQA":87.31,"VisRAG_SlideVQA":95.9,"VisRAG_InfoVQA":93.0,"VisRAG_PlotQA":92.49,"ViDoSeek-page":50.29,"ViDoSeek-doc":82.6,"MMLongBench-page":28.18,"MMLongBench-doc":52.03}
|
| 2 |
+
{"Rank":2,"Models":"<a href=\"https:\/\/github.com\/360CVGroup\/RzenEmbed\">RzenEmbed-v2-7B<\/a>","Model Size(B)":8.29,"Visdoc-Overall":77.06,"ViDoRe-V1":89.7,"ViDoRe-V2":60.7,"VisRAG":88.7,"VisDoc-OOD":44.38,"ViDoRe_arxivqa":88.42,"ViDoRe_docvqa":59.61,"ViDoRe_infovqa":92.85,"ViDoRe_tabfquad":95.85,"ViDoRe_tatdqa":76.5,"ViDoRe_shiftproject":91.76,"ViDoRe_syntheticDocQA_artificial_intelligence":99.63,"ViDoRe_syntheticDocQA_energy":95.71,"ViDoRe_syntheticDocQA_government_reports":97.02,"ViDoRe_syntheticDocQA_healthcare_industry":99.63,"ViDoRe_esg_reports_human_labeled_v2":65.61,"ViDoRe_biomedical_lectures_v2_multilingual":61.71,"ViDoRe_economics_reports_v2_multilingual":59.1,"ViDoRe_esg_reports_v2_multilingual":56.39,"VisRAG_ArxivQA":89.38,"VisRAG_ChartQA":89.61,"VisRAG_MP-DocVQA":90.59,"VisRAG_SlideVQA":95.68,"VisRAG_InfoVQA":94.11,"VisRAG_PlotQA":72.81,"ViDoSeek-page":23.09,"ViDoSeek-doc":84.02,"MMLongBench-page":15.62,"MMLongBench-doc":54.78}
|
| 3 |
+
{"Rank":3,"Models":"RzenEmbed-v1-7B","Model Size(B)":8.29,"Visdoc-Overall":76.8,"ViDoRe-V1":89.47,"ViDoRe-V2":60.77,"VisRAG":87.92,"VisDoc-OOD":44.44,"ViDoRe_arxivqa":86.89,"ViDoRe_docvqa":57.99,"ViDoRe_infovqa":92.24,"ViDoRe_tabfquad":96.25,"ViDoRe_tatdqa":75.22,"ViDoRe_shiftproject":93.35,"ViDoRe_syntheticDocQA_artificial_intelligence":99.63,"ViDoRe_syntheticDocQA_energy":95.25,"ViDoRe_syntheticDocQA_government_reports":98.28,"ViDoRe_syntheticDocQA_healthcare_industry":99.63,"ViDoRe_esg_reports_human_labeled_v2":63.67,"ViDoRe_biomedical_lectures_v2_multilingual":61.58,"ViDoRe_economics_reports_v2_multilingual":60.8,"ViDoRe_esg_reports_v2_multilingual":57.05,"VisRAG_ArxivQA":86.97,"VisRAG_ChartQA":88.34,"VisRAG_MP-DocVQA":90.7,"VisRAG_SlideVQA":95.75,"VisRAG_InfoVQA":94.37,"VisRAG_PlotQA":71.39,"ViDoSeek-page":23.06,"ViDoSeek-doc":83.92,"MMLongBench-page":16.1,"MMLongBench-doc":54.67}
|
| 4 |
+
{"Rank":4,"Models":"<a href=\"https:\/\/huggingface.co\/Alibaba-NLP\/gme-Qwen2-VL-7B-Instruct\">gme-Qwen2-VL-7B-Instruct<\/a>","Model Size(B)":8.29,"Visdoc-Overall":75.18,"ViDoRe-V1":89.44,"ViDoRe-V2":55.61,"VisRAG":84.99,"VisDoc-OOD":44.4,"ViDoRe_arxivqa":86.86,"ViDoRe_docvqa":57.46,"ViDoRe_infovqa":91.6,"ViDoRe_tabfquad":94.64,"ViDoRe_tatdqa":74.1,"ViDoRe_shiftproject":96.76,"ViDoRe_syntheticDocQA_artificial_intelligence":99.63,"ViDoRe_syntheticDocQA_energy":95.32,"ViDoRe_syntheticDocQA_government_reports":98.76,"ViDoRe_syntheticDocQA_healthcare_industry":99.26,"ViDoRe_esg_reports_human_labeled_v2":63.37,"ViDoRe_biomedical_lectures_v2_multilingual":49.49,"ViDoRe_economics_reports_v2_multilingual":54.21,"ViDoRe_esg_reports_v2_multilingual":55.38,"VisRAG_ArxivQA":87.35,"VisRAG_ChartQA":81.9,"VisRAG_MP-DocVQA":89.22,"VisRAG_SlideVQA":94.54,"VisRAG_InfoVQA":93.52,"VisRAG_PlotQA":63.42,"ViDoSeek-page":23.24,"ViDoSeek-doc":83.88,"MMLongBench-page":16.19,"MMLongBench-doc":54.29}
|
| 5 |
+
{"Rank":5,"Models":"RzenEmbed-v1-2B","Model Size(B)":2.21,"Visdoc-Overall":74.41,"ViDoRe-V1":86.98,"ViDoRe-V2":57.63,"VisRAG":85.35,"VisDoc-OOD":43.33,"ViDoRe_arxivqa":83.4,"ViDoRe_docvqa":56.09,"ViDoRe_infovqa":89.89,"ViDoRe_tabfquad":94.39,"ViDoRe_tatdqa":69.32,"ViDoRe_shiftproject":89.88,"ViDoRe_syntheticDocQA_artificial_intelligence":98.76,"ViDoRe_syntheticDocQA_energy":92.78,"ViDoRe_syntheticDocQA_government_reports":97.19,"ViDoRe_syntheticDocQA_healthcare_industry":98.15,"ViDoRe_esg_reports_human_labeled_v2":58.93,"ViDoRe_biomedical_lectures_v2_multilingual":59.87,"ViDoRe_economics_reports_v2_multilingual":57.98,"ViDoRe_esg_reports_v2_multilingual":53.73,"VisRAG_ArxivQA":83.68,"VisRAG_ChartQA":86.39,"VisRAG_MP-DocVQA":85.54,"VisRAG_SlideVQA":94.07,"VisRAG_InfoVQA":91.92,"VisRAG_PlotQA":70.52,"ViDoSeek-page":22.89,"ViDoSeek-doc":82.27,"MMLongBench-page":16.21,"MMLongBench-doc":51.95}
|
| 6 |
+
{"Rank":6,"Models":"<a href=\"https:\/\/seed1-6-embedding.github.io\">seed-1.6-embedding<\/a>","Model Size(B)":"unknown","Visdoc-Overall":73.44,"ViDoRe-V1":85.53,"ViDoRe-V2":56.57,"VisRAG":84.74,"VisDoc-OOD":43.14,"ViDoRe_arxivqa":88.46,"ViDoRe_docvqa":60.36,"ViDoRe_infovqa":90.62,"ViDoRe_tabfquad":87.82,"ViDoRe_tatdqa":74.98,"ViDoRe_shiftproject":73.34,"ViDoRe_syntheticDocQA_artificial_intelligence":96.32,"ViDoRe_syntheticDocQA_energy":92.83,"ViDoRe_syntheticDocQA_government_reports":93.54,"ViDoRe_syntheticDocQA_healthcare_industry":97.05,"ViDoRe_esg_reports_human_labeled_v2":63.3,"ViDoRe_biomedical_lectures_v2_multilingual":57.14,"ViDoRe_economics_reports_v2_multilingual":53.85,"ViDoRe_esg_reports_v2_multilingual":51.99,"VisRAG_ArxivQA":85.79,"VisRAG_ChartQA":81.88,"VisRAG_MP-DocVQA":89.43,"VisRAG_SlideVQA":94.9,"VisRAG_InfoVQA":92.41,"VisRAG_PlotQA":64.02,"ViDoSeek-page":22.8,"ViDoSeek-doc":82.57,"MMLongBench-page":15.57,"MMLongBench-doc":51.61}
|
| 7 |
+
{"Rank":7,"Models":"<a href=\"https:\/\/huggingface.co\/Alibaba-NLP\/gme-Qwen2-VL-2B-Instruct\">gme-Qwen2-VL-2B-Instruct<\/a>","Model Size(B)":2.21,"Visdoc-Overall":72.71,"ViDoRe-V1":86.15,"ViDoRe-V2":53.96,"VisRAG":82.52,"VisDoc-OOD":43.12,"ViDoRe_arxivqa":82.76,"ViDoRe_docvqa":53.11,"ViDoRe_infovqa":90.16,"ViDoRe_tabfquad":93.33,"ViDoRe_tatdqa":69.94,"ViDoRe_shiftproject":89.47,"ViDoRe_syntheticDocQA_artificial_intelligence":97.52,"ViDoRe_syntheticDocQA_energy":91.91,"ViDoRe_syntheticDocQA_government_reports":94.59,"ViDoRe_syntheticDocQA_healthcare_industry":98.69,"ViDoRe_esg_reports_human_labeled_v2":60.95,"ViDoRe_biomedical_lectures_v2_multilingual":53.98,"ViDoRe_economics_reports_v2_multilingual":50.19,"ViDoRe_esg_reports_v2_multilingual":50.73,"VisRAG_ArxivQA":82.0,"VisRAG_ChartQA":79.88,"VisRAG_MP-DocVQA":84.42,"VisRAG_SlideVQA":93.38,"VisRAG_InfoVQA":91.36,"VisRAG_PlotQA":64.09,"ViDoSeek-page":21.62,"ViDoSeek-doc":83.62,"MMLongBench-page":15.82,"MMLongBench-doc":51.43}
|
| 8 |
+
{"Rank":8,"Models":"<a href=\"https:\/\/huggingface.co\/vidore\/colpali-v1.3\">colpali-v1.3<\/a>","Model Size(B)":2.92,"Visdoc-Overall":70.97,"ViDoRe-V1":83.6,"ViDoRe-V2":51.98,"VisRAG":81.13,"VisDoc-OOD":43.12,"ViDoRe_arxivqa":81.74,"ViDoRe_docvqa":56.64,"ViDoRe_infovqa":84.94,"ViDoRe_tabfquad":86.93,"ViDoRe_tatdqa":70.87,"ViDoRe_shiftproject":75.12,"ViDoRe_syntheticDocQA_artificial_intelligence":95.65,"ViDoRe_syntheticDocQA_energy":94.67,"ViDoRe_syntheticDocQA_government_reports":93.55,"ViDoRe_syntheticDocQA_healthcare_industry":95.92,"ViDoRe_esg_reports_human_labeled_v2":51.29,"ViDoRe_biomedical_lectures_v2_multilingual":54.72,"ViDoRe_economics_reports_v2_multilingual":48.97,"ViDoRe_esg_reports_v2_multilingual":52.94,"VisRAG_ArxivQA":80.87,"VisRAG_ChartQA":78.15,"VisRAG_MP-DocVQA":86.76,"VisRAG_SlideVQA":95.03,"VisRAG_InfoVQA":85.69,"VisRAG_PlotQA":60.3,"ViDoSeek-page":22.16,"ViDoSeek-doc":83.66,"MMLongBench-page":14.17,"MMLongBench-doc":52.51}
|
| 9 |
+
{"Rank":9,"Models":"<a href=\"https:\/\/huggingface.co\/OpenSearch-AI\/Ops-MM-embedding-v1-7B\">Ops-MM-embedding-v1-7B<\/a>","Model Size(B)":8.29,"Visdoc-Overall":70.34,"ViDoRe-V1":80.05,"ViDoRe-V2":59.59,"VisRAG":79.31,"VisDoc-OOD":43.34,"ViDoRe_arxivqa":78.17,"ViDoRe_docvqa":49.11,"ViDoRe_infovqa":86.59,"ViDoRe_tabfquad":91.4,"ViDoRe_tatdqa":55.59,"ViDoRe_shiftproject":76.17,"ViDoRe_syntheticDocQA_artificial_intelligence":90.99,"ViDoRe_syntheticDocQA_energy":87.53,"ViDoRe_syntheticDocQA_government_reports":91.56,"ViDoRe_syntheticDocQA_healthcare_industry":93.39,"ViDoRe_esg_reports_human_labeled_v2":66.27,"ViDoRe_biomedical_lectures_v2_multilingual":54.34,"ViDoRe_economics_reports_v2_multilingual":60.92,"ViDoRe_esg_reports_v2_multilingual":56.82,"VisRAG_ArxivQA":78.18,"VisRAG_ChartQA":79.53,"VisRAG_MP-DocVQA":78.2,"VisRAG_SlideVQA":91.11,"VisRAG_InfoVQA":87.16,"VisRAG_PlotQA":61.71,"ViDoSeek-page":22.47,"ViDoSeek-doc":83.47,"MMLongBench-page":15.89,"MMLongBench-doc":51.51}
|
| 10 |
+
{"Rank":10,"Models":"<a href=\"https:\/\/huggingface.co\/zhibinlan\/UME-R1-7B\">UME-R1-7B<\/a>","Model Size(B)":8.29,"Visdoc-Overall":67.13,"ViDoRe-V1":75.66,"ViDoRe-V2":50.53,"VisRAG":83.7,"VisDoc-OOD":37.55,"ViDoRe_arxivqa":73.56,"ViDoRe_docvqa":41.1,"ViDoRe_infovqa":80.83,"ViDoRe_tabfquad":90.24,"ViDoRe_tatdqa":46.67,"ViDoRe_shiftproject":64.96,"ViDoRe_syntheticDocQA_artificial_intelligence":89.47,"ViDoRe_syntheticDocQA_energy":85.7,"ViDoRe_syntheticDocQA_government_reports":89.82,"ViDoRe_syntheticDocQA_healthcare_industry":94.29,"ViDoRe_esg_reports_human_labeled_v2":50.41,"ViDoRe_biomedical_lectures_v2_multilingual":50.73,"ViDoRe_economics_reports_v2_multilingual":57.8,"ViDoRe_esg_reports_v2_multilingual":43.18,"VisRAG_ArxivQA":80.45,"VisRAG_ChartQA":84.95,"VisRAG_MP-DocVQA":83.43,"VisRAG_SlideVQA":91.47,"VisRAG_InfoVQA":89.2,"VisRAG_PlotQA":72.7,"ViDoSeek-page":21.28,"ViDoSeek-doc":75.33,"MMLongBench-page":12.32,"MMLongBench-doc":41.28}
|
| 11 |
+
{"Rank":11,"Models":"<a href=\"https:\/\/huggingface.co\/OpenSearch-AI\/Ops-MM-embedding-v1-2B\">Ops-MM-embedding-v1-2B<\/a>","Model Size(B)":2.21,"Visdoc-Overall":66.96,"ViDoRe-V1":76.39,"ViDoRe-V2":53.18,"VisRAG":77.64,"VisDoc-OOD":41.17,"ViDoRe_arxivqa":73.72,"ViDoRe_docvqa":45.01,"ViDoRe_infovqa":81.41,"ViDoRe_tabfquad":88.81,"ViDoRe_tatdqa":49.59,"ViDoRe_shiftproject":72.57,"ViDoRe_syntheticDocQA_artificial_intelligence":89.8,"ViDoRe_syntheticDocQA_energy":84.27,"ViDoRe_syntheticDocQA_government_reports":87.06,"ViDoRe_syntheticDocQA_healthcare_industry":91.64,"ViDoRe_esg_reports_human_labeled_v2":58.57,"ViDoRe_biomedical_lectures_v2_multilingual":52.87,"ViDoRe_economics_reports_v2_multilingual":47.89,"ViDoRe_esg_reports_v2_multilingual":53.39,"VisRAG_ArxivQA":73.84,"VisRAG_ChartQA":80.17,"VisRAG_MP-DocVQA":75.06,"VisRAG_SlideVQA":89.15,"VisRAG_InfoVQA":86.66,"VisRAG_PlotQA":60.99,"ViDoSeek-page":21.44,"ViDoSeek-doc":82.29,"MMLongBench-page":13.06,"MMLongBench-doc":47.89}
|
| 12 |
+
{"Rank":12,"Models":"<a href=\"https:\/\/huggingface.co\/VLM2Vec\/VLM2Vec-V2.0\">VLM2Vec-V2.0-Qwen2VL-2B<\/a>","Model Size(B)":2.21,"Visdoc-Overall":65.36,"ViDoRe-V1":75.52,"ViDoRe-V2":44.86,"VisRAG":79.38,"VisDoc-OOD":39.43,"ViDoRe_arxivqa":80.58,"ViDoRe_docvqa":44.85,"ViDoRe_infovqa":83.69,"ViDoRe_tabfquad":89.21,"ViDoRe_tatdqa":43.82,"ViDoRe_shiftproject":60.82,"ViDoRe_syntheticDocQA_artificial_intelligence":88.53,"ViDoRe_syntheticDocQA_energy":86.51,"ViDoRe_syntheticDocQA_government_reports":85.01,"ViDoRe_syntheticDocQA_healthcare_industry":92.17,"ViDoRe_esg_reports_human_labeled_v2":45.56,"ViDoRe_biomedical_lectures_v2_multilingual":44.27,"ViDoRe_economics_reports_v2_multilingual":43.0,"ViDoRe_esg_reports_v2_multilingual":46.62,"VisRAG_ArxivQA":76.87,"VisRAG_ChartQA":84.41,"VisRAG_MP-DocVQA":71.79,"VisRAG_SlideVQA":91.5,"VisRAG_InfoVQA":85.65,"VisRAG_PlotQA":66.05,"ViDoSeek-page":21.94,"ViDoSeek-doc":80.18,"MMLongBench-page":11.89,"MMLongBench-doc":43.71}
|
| 13 |
+
{"Rank":13,"Models":"<a href=\"https:\/\/arxiv.org\/abs\/2503.19900\">interestFM-UIR-CAFe-7B<\/a>","Model Size(B)":8.03,"Visdoc-Overall":63.92,"ViDoRe-V1":70.68,"ViDoRe-V2":49.57,"VisRAG":79.45,"VisDoc-OOD":38.07,"ViDoRe_arxivqa":73.29,"ViDoRe_docvqa":38.27,"ViDoRe_infovqa":80.59,"ViDoRe_tabfquad":80.7,"ViDoRe_tatdqa":37.79,"ViDoRe_shiftproject":52.02,"ViDoRe_syntheticDocQA_artificial_intelligence":85.99,"ViDoRe_syntheticDocQA_energy":84.78,"ViDoRe_syntheticDocQA_government_reports":84.96,"ViDoRe_syntheticDocQA_healthcare_industry":88.37,"ViDoRe_esg_reports_human_labeled_v2":50.67,"ViDoRe_biomedical_lectures_v2_multilingual":50.89,"ViDoRe_economics_reports_v2_multilingual":54.38,"ViDoRe_esg_reports_v2_multilingual":42.33,"VisRAG_ArxivQA":73.96,"VisRAG_ChartQA":82.71,"VisRAG_MP-DocVQA":75.15,"VisRAG_SlideVQA":87.58,"VisRAG_InfoVQA":87.91,"VisRAG_PlotQA":69.42,"ViDoSeek-page":22.52,"ViDoSeek-doc":73.82,"MMLongBench-page":13.32,"MMLongBench-doc":42.61}
|
| 14 |
+
{"Rank":14,"Models":"<a href=\"https:\/\/huggingface.co\/zhibinlan\/UME-R1-2B\">UME-R1-2B<\/a>","Model Size(B)":2.21,"Visdoc-Overall":63.86,"ViDoRe-V1":72.39,"ViDoRe-V2":46.16,"VisRAG":79.22,"VisDoc-OOD":37.17,"ViDoRe_arxivqa":73.86,"ViDoRe_docvqa":37.85,"ViDoRe_infovqa":76.19,"ViDoRe_tabfquad":86.11,"ViDoRe_tatdqa":40.63,"ViDoRe_shiftproject":66.79,"ViDoRe_syntheticDocQA_artificial_intelligence":85.87,"ViDoRe_syntheticDocQA_energy":83.25,"ViDoRe_syntheticDocQA_government_reports":82.56,"ViDoRe_syntheticDocQA_healthcare_industry":90.83,"ViDoRe_esg_reports_human_labeled_v2":50.16,"ViDoRe_biomedical_lectures_v2_multilingual":46.15,"ViDoRe_economics_reports_v2_multilingual":45.69,"ViDoRe_esg_reports_v2_multilingual":42.65,"VisRAG_ArxivQA":74.26,"VisRAG_ChartQA":85.98,"VisRAG_MP-DocVQA":75.59,"VisRAG_SlideVQA":87.11,"VisRAG_InfoVQA":84.38,"VisRAG_PlotQA":67.98,"ViDoSeek-page":21.2,"ViDoSeek-doc":75.89,"MMLongBench-page":11.9,"MMLongBench-doc":39.68}
|
| 15 |
+
{"Rank":15,"Models":"<a href=\"https:\/\/github.com\/GaryGuTC\/UniME-v2\">UniME-V2-LLaVA-OneVision-7B<\/a>","Model Size(B)":8.03,"Visdoc-Overall":56.68,"ViDoRe-V1":61.76,"ViDoRe-V2":42.0,"VisRAG":70.53,"VisDoc-OOD":37.86,"ViDoRe_arxivqa":51.9,"ViDoRe_docvqa":38.17,"ViDoRe_infovqa":73.16,"ViDoRe_tabfquad":57.75,"ViDoRe_tatdqa":35.48,"ViDoRe_shiftproject":45.42,"ViDoRe_syntheticDocQA_artificial_intelligence":76.85,"ViDoRe_syntheticDocQA_energy":77.27,"ViDoRe_syntheticDocQA_government_reports":79.87,"ViDoRe_syntheticDocQA_healthcare_industry":81.71,"ViDoRe_esg_reports_human_labeled_v2":54.67,"ViDoRe_biomedical_lectures_v2_multilingual":33.75,"ViDoRe_economics_reports_v2_multilingual":35.95,"ViDoRe_esg_reports_v2_multilingual":43.63,"VisRAG_ArxivQA":53.13,"VisRAG_ChartQA":83.67,"VisRAG_MP-DocVQA":66.4,"VisRAG_SlideVQA":86.44,"VisRAG_InfoVQA":82.58,"VisRAG_PlotQA":50.96,"ViDoSeek-page":22.89,"ViDoSeek-doc":75.79,"MMLongBench-page":11.96,"MMLongBench-doc":40.8}
|
| 16 |
+
{"Rank":16,"Models":"<a href=\"https:\/\/arxiv.org\/abs\/2503.19900\">interestFM-UIR-CAFe-0.5B<\/a>","Model Size(B)":0.894,"Visdoc-Overall":51.41,"ViDoRe-V1":56.93,"ViDoRe-V2":32.59,"VisRAG":68.57,"VisDoc-OOD":30.69,"ViDoRe_arxivqa":51.19,"ViDoRe_docvqa":25.46,"ViDoRe_infovqa":71.95,"ViDoRe_tabfquad":59.64,"ViDoRe_tatdqa":27.05,"ViDoRe_shiftproject":31.8,"ViDoRe_syntheticDocQA_artificial_intelligence":78.74,"ViDoRe_syntheticDocQA_energy":70.75,"ViDoRe_syntheticDocQA_government_reports":75.47,"ViDoRe_syntheticDocQA_healthcare_industry":77.29,"ViDoRe_esg_reports_human_labeled_v2":36.24,"ViDoRe_biomedical_lectures_v2_multilingual":29.72,"ViDoRe_economics_reports_v2_multilingual":37.61,"ViDoRe_esg_reports_v2_multilingual":26.78,"VisRAG_ArxivQA":57.82,"VisRAG_ChartQA":75.62,"VisRAG_MP-DocVQA":63.24,"VisRAG_SlideVQA":82.32,"VisRAG_InfoVQA":80.28,"VisRAG_PlotQA":52.14,"ViDoSeek-page":17.62,"ViDoSeek-doc":61.41,"MMLongBench-page":9.95,"MMLongBench-doc":33.78}
|
| 17 |
+
{"Rank":17,"Models":"<a href=\"https:\/\/huggingface.co\/code-kunkun\/LamRA-Ret-Qwen2.5VL-7b\">LamRA-Ret-Qwen2.5VL-7b<\/a>","Model Size(B)":8.29,"Visdoc-Overall":50.24,"ViDoRe-V1":56.31,"ViDoRe-V2":33.32,"VisRAG":58.18,"VisDoc-OOD":40.09,"ViDoRe_arxivqa":52.98,"ViDoRe_docvqa":25.42,"ViDoRe_infovqa":72.31,"ViDoRe_tabfquad":66.08,"ViDoRe_tatdqa":25.85,"ViDoRe_shiftproject":27.34,"ViDoRe_syntheticDocQA_artificial_intelligence":72.01,"ViDoRe_syntheticDocQA_energy":65.17,"ViDoRe_syntheticDocQA_government_reports":72.15,"ViDoRe_syntheticDocQA_healthcare_industry":83.75,"ViDoRe_esg_reports_human_labeled_v2":32.95,"ViDoRe_biomedical_lectures_v2_multilingual":35.87,"ViDoRe_economics_reports_v2_multilingual":31.94,"ViDoRe_esg_reports_v2_multilingual":32.51,"VisRAG_ArxivQA":37.68,"VisRAG_ChartQA":65.9,"VisRAG_MP-DocVQA":54.52,"VisRAG_SlideVQA":76.54,"VisRAG_InfoVQA":73.26,"VisRAG_PlotQA":41.2,"ViDoSeek-page":23.05,"ViDoSeek-doc":80.26,"MMLongBench-page":13.52,"MMLongBench-doc":43.54}
|
| 18 |
+
{"Rank":18,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Qwen2VL-7B\">VLM2Vec-V1-Qwen2VL-7B<\/a>","Model Size(B)":8.29,"Visdoc-Overall":46.43,"ViDoRe-V1":56.95,"ViDoRe-V2":9.41,"VisRAG":59.12,"VisDoc-OOD":38.09,"ViDoRe_arxivqa":60.2,"ViDoRe_docvqa":34.71,"ViDoRe_infovqa":70.38,"ViDoRe_tabfquad":78.18,"ViDoRe_tatdqa":27.62,"ViDoRe_shiftproject":38.62,"ViDoRe_syntheticDocQA_artificial_intelligence":67.65,"ViDoRe_syntheticDocQA_energy":60.42,"ViDoRe_syntheticDocQA_government_reports":61.82,"ViDoRe_syntheticDocQA_healthcare_industry":69.85,"ViDoRe_esg_reports_human_labeled_v2":6.82,"ViDoRe_biomedical_lectures_v2_multilingual":5.06,"ViDoRe_economics_reports_v2_multilingual":13.88,"ViDoRe_esg_reports_v2_multilingual":11.89,"VisRAG_ArxivQA":52.61,"VisRAG_ChartQA":70.23,"VisRAG_MP-DocVQA":52.81,"VisRAG_SlideVQA":72.77,"VisRAG_InfoVQA":71.96,"VisRAG_PlotQA":34.35,"ViDoSeek-page":22.28,"ViDoSeek-doc":77.81,"MMLongBench-page":11.82,"MMLongBench-doc":40.46}
|
| 19 |
+
{"Rank":19,"Models":"<a href=\"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Qwen2VL-2B\">VLM2Vec-V1-Qwen2VL-2B<\/a>","Model Size(B)":2.21,"Visdoc-Overall":41.55,"ViDoRe-V1":49.81,"ViDoRe-V2":13.51,"VisRAG":51.83,"VisDoc-OOD":33.55,"ViDoRe_arxivqa":48.88,"ViDoRe_docvqa":26.95,"ViDoRe_infovqa":67.24,"ViDoRe_tabfquad":62.58,"ViDoRe_tatdqa":19.79,"ViDoRe_shiftproject":41.8,"ViDoRe_syntheticDocQA_artificial_intelligence":55.02,"ViDoRe_syntheticDocQA_energy":59.11,"ViDoRe_syntheticDocQA_government_reports":57.07,"ViDoRe_syntheticDocQA_healthcare_industry":59.64,"ViDoRe_esg_reports_human_labeled_v2":12.6,"ViDoRe_biomedical_lectures_v2_multilingual":7.4,"ViDoRe_economics_reports_v2_multilingual":13.9,"ViDoRe_esg_reports_v2_multilingual":20.13,"VisRAG_ArxivQA":41.75,"VisRAG_ChartQA":57.93,"VisRAG_MP-DocVQA":43.18,"VisRAG_SlideVQA":74.03,"VisRAG_InfoVQA":70.67,"VisRAG_PlotQA":23.4,"ViDoSeek-page":17.74,"ViDoSeek-doc":74.28,"MMLongBench-page":9.6,"MMLongBench-doc":32.57}
|
| 20 |
+
{"Rank":20,"Models":"<a href=\"https:\/\/huggingface.co\/code-kunkun\/LamRA-Ret\">LamRA-Ret<\/a>","Model Size(B)":8.29,"Visdoc-Overall":23.91,"ViDoRe-V1":21.99,"ViDoRe-V2":11.46,"VisRAG":37.35,"VisDoc-OOD":20.99,"ViDoRe_arxivqa":10.8,"ViDoRe_docvqa":19.13,"ViDoRe_infovqa":46.25,"ViDoRe_tabfquad":42.84,"ViDoRe_tatdqa":11.43,"ViDoRe_shiftproject":12.04,"ViDoRe_syntheticDocQA_artificial_intelligence":10.32,"ViDoRe_syntheticDocQA_energy":24.77,"ViDoRe_syntheticDocQA_government_reports":16.36,"ViDoRe_syntheticDocQA_healthcare_industry":25.94,"ViDoRe_esg_reports_human_labeled_v2":7.58,"ViDoRe_biomedical_lectures_v2_multilingual":13.28,"ViDoRe_economics_reports_v2_multilingual":19.08,"ViDoRe_esg_reports_v2_multilingual":5.92,"VisRAG_ArxivQA":2.02,"VisRAG_ChartQA":41.34,"VisRAG_MP-DocVQA":33.38,"VisRAG_SlideVQA":56.49,"VisRAG_InfoVQA":56.34,"VisRAG_PlotQA":34.55,"ViDoSeek-page":11.29,"ViDoSeek-doc":37.14,"MMLongBench-page":7.95,"MMLongBench-doc":27.58}
|
| 21 |
+
{"Rank":21,"Models":"<a href=\"https:\/\/github.com\/QQ-MM\/QQMM-embed\">QQMM-embed-v2<\/a>","Model Size(B)":8.29,"Visdoc-Overall":0.0,"ViDoRe-V1":0.0,"ViDoRe-V2":0.0,"VisRAG":0.0,"VisDoc-OOD":0.0,"ViDoRe_arxivqa":0.0,"ViDoRe_docvqa":0.0,"ViDoRe_infovqa":0.0,"ViDoRe_tabfquad":0.0,"ViDoRe_tatdqa":0.0,"ViDoRe_shiftproject":0.0,"ViDoRe_syntheticDocQA_artificial_intelligence":0.0,"ViDoRe_syntheticDocQA_energy":0.0,"ViDoRe_syntheticDocQA_government_reports":0.0,"ViDoRe_syntheticDocQA_healthcare_industry":0.0,"ViDoRe_esg_reports_human_labeled_v2":0.0,"ViDoRe_biomedical_lectures_v2_multilingual":0.0,"ViDoRe_economics_reports_v2_multilingual":0.0,"ViDoRe_esg_reports_v2_multilingual":0.0,"VisRAG_ArxivQA":0.0,"VisRAG_ChartQA":0.0,"VisRAG_MP-DocVQA":0.0,"VisRAG_SlideVQA":0.0,"VisRAG_InfoVQA":0.0,"VisRAG_PlotQA":0.0,"ViDoSeek-page":0.0,"ViDoSeek-doc":0.0,"MMLongBench-page":0.0,"MMLongBench-doc":0.0}
|
| 22 |
+
{"Rank":22,"Models":"OEmbedding-v1-7B","Model Size(B)":8.29,"Visdoc-Overall":0.0,"ViDoRe-V1":0.0,"ViDoRe-V2":0.0,"VisRAG":0.0,"VisDoc-OOD":0.0,"ViDoRe_arxivqa":0.0,"ViDoRe_docvqa":0.0,"ViDoRe_infovqa":0.0,"ViDoRe_tabfquad":0.0,"ViDoRe_tatdqa":0.0,"ViDoRe_shiftproject":0.0,"ViDoRe_syntheticDocQA_artificial_intelligence":0.0,"ViDoRe_syntheticDocQA_energy":0.0,"ViDoRe_syntheticDocQA_government_reports":0.0,"ViDoRe_syntheticDocQA_healthcare_industry":0.0,"ViDoRe_esg_reports_human_labeled_v2":0.0,"ViDoRe_biomedical_lectures_v2_multilingual":0.0,"ViDoRe_economics_reports_v2_multilingual":0.0,"ViDoRe_esg_reports_v2_multilingual":0.0,"VisRAG_ArxivQA":0.0,"VisRAG_ChartQA":0.0,"VisRAG_MP-DocVQA":0.0,"VisRAG_SlideVQA":0.0,"VisRAG_InfoVQA":0.0,"VisRAG_PlotQA":0.0,"ViDoSeek-page":0.0,"ViDoSeek-doc":0.0,"MMLongBench-page":0.0,"MMLongBench-doc":0.0}
|
| 23 |
+
{"Rank":23,"Models":"ReCo-7B","Model Size(B)":8.29,"Visdoc-Overall":0.0,"ViDoRe-V1":0.0,"ViDoRe-V2":0.0,"VisRAG":0.0,"VisDoc-OOD":0.0,"ViDoRe_arxivqa":0.0,"ViDoRe_docvqa":0.0,"ViDoRe_infovqa":0.0,"ViDoRe_tabfquad":0.0,"ViDoRe_tatdqa":0.0,"ViDoRe_shiftproject":0.0,"ViDoRe_syntheticDocQA_artificial_intelligence":0.0,"ViDoRe_syntheticDocQA_energy":0.0,"ViDoRe_syntheticDocQA_government_reports":0.0,"ViDoRe_syntheticDocQA_healthcare_industry":0.0,"ViDoRe_esg_reports_human_labeled_v2":0.0,"ViDoRe_biomedical_lectures_v2_multilingual":0.0,"ViDoRe_economics_reports_v2_multilingual":0.0,"ViDoRe_esg_reports_v2_multilingual":0.0,"VisRAG_ArxivQA":0.0,"VisRAG_ChartQA":0.0,"VisRAG_MP-DocVQA":0.0,"VisRAG_SlideVQA":0.0,"VisRAG_InfoVQA":0.0,"VisRAG_PlotQA":0.0,"ViDoSeek-page":0.0,"ViDoSeek-doc":0.0,"MMLongBench-page":0.0,"MMLongBench-doc":0.0}
|
| 24 |
+
{"Rank":24,"Models":"TCE-v1","Model Size(B)":8.0,"Visdoc-Overall":0.0,"ViDoRe-V1":0.0,"ViDoRe-V2":0.0,"VisRAG":0.0,"VisDoc-OOD":0.0,"ViDoRe_arxivqa":0.0,"ViDoRe_docvqa":0.0,"ViDoRe_infovqa":0.0,"ViDoRe_tabfquad":0.0,"ViDoRe_tatdqa":0.0,"ViDoRe_shiftproject":0.0,"ViDoRe_syntheticDocQA_artificial_intelligence":0.0,"ViDoRe_syntheticDocQA_energy":0.0,"ViDoRe_syntheticDocQA_government_reports":0.0,"ViDoRe_syntheticDocQA_healthcare_industry":0.0,"ViDoRe_esg_reports_human_labeled_v2":0.0,"ViDoRe_biomedical_lectures_v2_multilingual":0.0,"ViDoRe_economics_reports_v2_multilingual":0.0,"ViDoRe_esg_reports_v2_multilingual":0.0,"VisRAG_ArxivQA":0.0,"VisRAG_ChartQA":0.0,"VisRAG_MP-DocVQA":0.0,"VisRAG_SlideVQA":0.0,"VisRAG_InfoVQA":0.0,"VisRAG_PlotQA":0.0,"ViDoSeek-page":0.0,"ViDoSeek-doc":0.0,"MMLongBench-page":0.0,"MMLongBench-doc":0.0}
|
utils.py
CHANGED
|
@@ -1,13 +1,7 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
-
import gradio as gr
|
| 3 |
-
import csv
|
| 4 |
-
import json
|
| 5 |
import os
|
| 6 |
-
import requests
|
| 7 |
-
import io
|
| 8 |
-
import shutil
|
| 9 |
import pprint as pp
|
| 10 |
-
|
| 11 |
|
| 12 |
from datasets import DATASETS
|
| 13 |
|
|
@@ -35,9 +29,13 @@ Building upon on **MMEB-V1**, **MMEB-V2** expands the evaluation scope to includ
|
|
| 35 |
— Video Retrieval, Moment Retrieval, Video Classification, and Video Question Answering — and one task focused on visual documents, Visual Document Retrieval.
|
| 36 |
This comprehensive suite enables robust evaluation of multimodal embedding models across static, temporal, and structured visual data settings.
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
| [**📈Overview**](https://tiger-ai-lab.github.io/VLM2Vec/) | [**Github**](https://github.com/TIGER-AI-Lab/VLM2Vec)
|
| 43 |
| [**📖MMEB-V2/VLM2Vec-V2 Paper**](https://arxiv.org/abs/2507.04590)
|
|
@@ -46,11 +44,12 @@ For researchers relying on MMEB-V1, we recommend transitioning to MMEB-V2 for mo
|
|
| 46 |
| [**Discord**](https://discord.gg/njyKubdtry) |
|
| 47 |
"""
|
| 48 |
|
| 49 |
-
TABLE_INTRODUCTION = """***Important Notes:***
|
| 50 |
-
This is the old MMEB-V1 leaderboard, which is now deprecated and going to be removed from this web page soon. MMEB-V1 is now the Image section of MMEB-V2, and the results on this leaderboard have been integrated into MMEB-V2 Image section. For researchers relying on MMEB-V1, we recommend transitioning to MMEB-V2 for more comprehensive evaluation metrics and support. Thank you for your collaborations and understanding! \n"""
|
| 51 |
-
|
| 52 |
LEADERBOARD_INFO = f"""
|
| 53 |
## Dataset Overview
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
This is the dictionary of all datasets used in our code. Please make sure all datasets' scores are included in your submission. \n
|
| 55 |
```python
|
| 56 |
{pp.pformat(DATASETS)}
|
|
@@ -66,13 +65,19 @@ CITATION_BUTTON_TEXT = r"""@article{jiang2024vlm2vec,
|
|
| 66 |
}"""
|
| 67 |
|
| 68 |
SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction \n
|
| 69 |
-
|
| 70 |
1. **Step 1️⃣:** Please refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for detailed instructions about evaluating your model. \n
|
| 71 |
-
2. **Step 2️⃣:** After running the evaluation pipelines, please use the script we provided
|
| 72 |
-
- Reminder:
|
| 73 |
-
- Note
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
```json
|
| 77 |
{
|
| 78 |
"metadata": {
|
|
@@ -80,6 +85,7 @@ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction \n
|
|
| 80 |
"url": "<Model URL>" or null,
|
| 81 |
"model_size": <Model Size> or null,
|
| 82 |
"data_source": "Self-Reported",
|
|
|
|
| 83 |
... ...
|
| 84 |
},
|
| 85 |
"metrics": {
|
|
@@ -101,12 +107,6 @@ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction \n
|
|
| 101 |
}
|
| 102 |
}
|
| 103 |
```
|
| 104 |
-
3. **Step 3️⃣:** Finally, create a pull request and upload the generated JSON file to the ***scores*** folder.
|
| 105 |
-
- You can directly use the Hugging Face Space's web UI to upload your files:
|
| 106 |
-
- Go to the [scores folder](https://huggingface.co/spaces/TIGER-Lab/MMEB-Leaderboard/upload/main/scores)
|
| 107 |
-
- Select "Upload file" and upload your JSON files.
|
| 108 |
-
- Please inform us on [our discord server](https://discord.gg/njyKubdtry), or send us an email at [email protected], or leave a simple message (simply @ us) in the PR to indicate that you are ready to merge your PR.
|
| 109 |
-
- We will then review your submission and update the leaderboard accordingly. \n\n
|
| 110 |
## ⚠️ Special Instructions for submitting to MMEB Image (Previously MMEB-V1) Leaderboard
|
| 111 |
We understand that some researchers want to exclusively submit to the Image leaderboard, but unfortunately our current leaderboard system cannot exclude your model from other modalities' leaderboards.
|
| 112 |
To do so, please run the 36 image datasets only and simply ignore the other datasets.
|
|
|
|
| 1 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
| 2 |
import os
|
|
|
|
|
|
|
|
|
|
| 3 |
import pprint as pp
|
| 4 |
+
# import requests
|
| 5 |
|
| 6 |
from datasets import DATASETS
|
| 7 |
|
|
|
|
| 29 |
— Video Retrieval, Moment Retrieval, Video Classification, and Video Question Answering — and one task focused on visual documents, Visual Document Retrieval.
|
| 30 |
This comprehensive suite enables robust evaluation of multimodal embedding models across static, temporal, and structured visual data settings.
|
| 31 |
|
| 32 |
+
<details>
|
| 33 |
+
<summary><span style='font-weight:bold'>🔥 What's NEW: The leaderboards' rankings can be directly downloaded now. Go to Files: rankings/ folder and select the leaderboard you want to download</span></summary>
|
| 34 |
+
<ul>
|
| 35 |
+
<li>[2025-11] The leaderboards' rankings can be directly downloaded in csv/json format. Go to Files: rankings/ folder to download. A download button will be added to this page soon.</li>
|
| 36 |
+
<li>[2025-06] MMEB-V2 released!</li>
|
| 37 |
+
</ul
|
| 38 |
+
</details>
|
| 39 |
|
| 40 |
| [**📈Overview**](https://tiger-ai-lab.github.io/VLM2Vec/) | [**Github**](https://github.com/TIGER-AI-Lab/VLM2Vec)
|
| 41 |
| [**📖MMEB-V2/VLM2Vec-V2 Paper**](https://arxiv.org/abs/2507.04590)
|
|
|
|
| 44 |
| [**Discord**](https://discord.gg/njyKubdtry) |
|
| 45 |
"""
|
| 46 |
|
|
|
|
|
|
|
|
|
|
| 47 |
LEADERBOARD_INFO = f"""
|
| 48 |
## Dataset Overview
|
| 49 |
+
<details>
|
| 50 |
+
<summary>Visual Overview</summary>
|
| 51 |
+
<img src='overview.png' alt='overview'/>
|
| 52 |
+
</details>
|
| 53 |
This is the dictionary of all datasets used in our code. Please make sure all datasets' scores are included in your submission. \n
|
| 54 |
```python
|
| 55 |
{pp.pformat(DATASETS)}
|
|
|
|
| 65 |
}"""
|
| 66 |
|
| 67 |
SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction \n
|
| 68 |
+
## We have provided detailed step-by-step guides on how to submit your model. Please read carefully in order to submit successfully. \n
|
| 69 |
1. **Step 1️⃣:** Please refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for detailed instructions about evaluating your model. \n
|
| 70 |
+
2. **Step 2️⃣:** After running the evaluation pipelines, please use the script we provided **(e.g., [report_score_v2.py](https://github.com/TIGER-AI-Lab/VLM2Vec/blob/main/experiments/report_score_v2.py))** to generate the unified score sheet.
|
| 71 |
+
- Reminder: Adjust your model's configurations in the script, including model name, URL, model size, and data source.
|
| 72 |
+
- Note the "model size" field is digits-only and is by default in Billions (B), so please convert it accordingly if your model size is in different units/formats (e.x., "8" for 8 billion, "0.5" for 500 million, don't include non-digits in it).
|
| 73 |
+
- If possible, please also add a contact method in case we want to reach you in the future
|
| 74 |
+
3. **Step 3️⃣:** Finally, create a pull request and upload the generated JSON file to the ***scores*** folder.
|
| 75 |
+
- You can directly use the Hugging Face Space's web UI to upload your files:
|
| 76 |
+
- Go to the [scores folder](https://huggingface.co/spaces/TIGER-Lab/MMEB-Leaderboard/upload/main/scores)
|
| 77 |
+
- Select "Upload file" and upload your JSON files.
|
| 78 |
+
- Please inform us on [our discord server](https://discord.gg/njyKubdtry), or send us an email at [email protected], or leave a simple message (@ us) in the PR to indicate that you are ready to merge your PR.
|
| 79 |
+
- We will then review your submission and update the leaderboard accordingly. \n\n
|
| 80 |
+
## Please double check your score sheet have the following JSON format ⬇️: \n
|
| 81 |
```json
|
| 82 |
{
|
| 83 |
"metadata": {
|
|
|
|
| 85 |
"url": "<Model URL>" or null,
|
| 86 |
"model_size": <Model Size> or null,
|
| 87 |
"data_source": "Self-Reported",
|
| 88 |
+
"contact": [email protected]
|
| 89 |
... ...
|
| 90 |
},
|
| 91 |
"metrics": {
|
|
|
|
| 107 |
}
|
| 108 |
}
|
| 109 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
## ⚠️ Special Instructions for submitting to MMEB Image (Previously MMEB-V1) Leaderboard
|
| 111 |
We understand that some researchers want to exclusively submit to the Image leaderboard, but unfortunately our current leaderboard system cannot exclude your model from other modalities' leaderboards.
|
| 112 |
To do so, please run the 36 image datasets only and simply ignore the other datasets.
|
utils_v2.py
CHANGED
|
@@ -160,4 +160,14 @@ def search_and_filter_models(df, query, min_size, max_size):
|
|
| 160 |
|
| 161 |
filtered_df = filtered_df[size_mask]
|
| 162 |
|
| 163 |
-
return filtered_df[COLUMN_NAMES]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
filtered_df = filtered_df[size_mask]
|
| 162 |
|
| 163 |
+
return filtered_df[COLUMN_NAMES]
|
| 164 |
+
|
| 165 |
+
def save_ranking_summary(df, name, dir='rankings'):
|
| 166 |
+
csv_path, json_path = os.path.join(dir, f'{name}.csv'), os.path.join(dir, f'{name}.jsonl')
|
| 167 |
+
df.to_csv(csv_path, index=False)
|
| 168 |
+
df.to_json(json_path, orient='records', lines=True)
|
| 169 |
+
return csv_path, json_path
|
| 170 |
+
|
| 171 |
+
def download_ranking(df, name, format='csv', dir='rankings'):
|
| 172 |
+
csv_path, json_path = save_ranking_summary(df, name, dir)
|
| 173 |
+
return csv_path if format == 'csv' else json_path
|