Spaces:
Running
Running
add gemini pro results
Browse files
data/leaderboard_json/afrobench_lite.json
CHANGED
|
@@ -21,7 +21,8 @@
|
|
| 21 |
"Gemini-2.5 Flash": 69.9,
|
| 22 |
"Claude 4.0 Sonnet": 68.1,
|
| 23 |
"Claude 3.7 Sonnet": 59.8,
|
| 24 |
-
"Claude 4.5 Sonnet": 71.7
|
|
|
|
| 25 |
}
|
| 26 |
},
|
| 27 |
"Intent": {
|
|
@@ -46,7 +47,8 @@
|
|
| 46 |
"Gemini-2.5 Flash": 87.9,
|
| 47 |
"Claude 4.0 Sonnet": 80.4,
|
| 48 |
"Claude 3.7 Sonnet": 73.4,
|
| 49 |
-
"Claude 4.5 Sonnet": 81.6
|
|
|
|
| 50 |
}
|
| 51 |
},
|
| 52 |
"MT(en/fr-xx)": {
|
|
@@ -70,7 +72,8 @@
|
|
| 70 |
"Lugha-Llama 8B": 22.1,
|
| 71 |
"Gemini-2.5 Flash": 46.5,
|
| 72 |
"Claude 4.0 Sonnet": 46.0,
|
| 73 |
-
"Claude 3.7 Sonnet": 44.0
|
|
|
|
| 74 |
}
|
| 75 |
},
|
| 76 |
"MMLU": {
|
|
@@ -95,7 +98,8 @@
|
|
| 95 |
"Gemini-2.5 Flash": 67.7,
|
| 96 |
"Claude 4.0 Sonnet": 75.5,
|
| 97 |
"Claude 3.7 Sonnet": 66.7,
|
| 98 |
-
"Claude 4.5 Sonnet": 58.6
|
|
|
|
| 99 |
}
|
| 100 |
},
|
| 101 |
"Math": {
|
|
@@ -120,7 +124,8 @@
|
|
| 120 |
"Gemini-2.5 Flash": 70.6,
|
| 121 |
"Claude 4.0 Sonnet": 66.9,
|
| 122 |
"Claude 3.7 Sonnet": 35.2,
|
| 123 |
-
"Claude 4.5 Sonnet": 73.1
|
|
|
|
| 124 |
}
|
| 125 |
},
|
| 126 |
"Topic": {
|
|
@@ -145,7 +150,8 @@
|
|
| 145 |
"Gemini-2.5 Flash": 87.2,
|
| 146 |
"Claude 4.0 Sonnet": 83.2,
|
| 147 |
"Claude 3.7 Sonnet": 84.9,
|
| 148 |
-
"Claude 4.5 Sonnet": 84.2
|
|
|
|
| 149 |
}
|
| 150 |
},
|
| 151 |
"RC": {
|
|
@@ -170,7 +176,8 @@
|
|
| 170 |
"Gemini-2.5 Flash": 42.2,
|
| 171 |
"Claude 4.0 Sonnet": 76.2,
|
| 172 |
"Claude 3.7 Sonnet": 65.1,
|
| 173 |
-
"Claude 4.5 Sonnet": 74.8
|
|
|
|
| 174 |
}
|
| 175 |
}
|
| 176 |
}
|
|
|
|
| 21 |
"Gemini-2.5 Flash": 69.9,
|
| 22 |
"Claude 4.0 Sonnet": 68.1,
|
| 23 |
"Claude 3.7 Sonnet": 59.8,
|
| 24 |
+
"Claude 4.5 Sonnet": 71.7,
|
| 25 |
+
"Gemini 2.5 Pro": 72.7
|
| 26 |
}
|
| 27 |
},
|
| 28 |
"Intent": {
|
|
|
|
| 47 |
"Gemini-2.5 Flash": 87.9,
|
| 48 |
"Claude 4.0 Sonnet": 80.4,
|
| 49 |
"Claude 3.7 Sonnet": 73.4,
|
| 50 |
+
"Claude 4.5 Sonnet": 81.6,
|
| 51 |
+
"Gemini 2.5 Pro": 88.3
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"MT(en/fr-xx)": {
|
|
|
|
| 72 |
"Lugha-Llama 8B": 22.1,
|
| 73 |
"Gemini-2.5 Flash": 46.5,
|
| 74 |
"Claude 4.0 Sonnet": 46.0,
|
| 75 |
+
"Claude 3.7 Sonnet": 44.0,
|
| 76 |
+
"Gemini 2.5 Pro": 47.4
|
| 77 |
}
|
| 78 |
},
|
| 79 |
"MMLU": {
|
|
|
|
| 98 |
"Gemini-2.5 Flash": 67.7,
|
| 99 |
"Claude 4.0 Sonnet": 75.5,
|
| 100 |
"Claude 3.7 Sonnet": 66.7,
|
| 101 |
+
"Claude 4.5 Sonnet": 58.6,
|
| 102 |
+
"Gemini 2.5 Pro": 78.2
|
| 103 |
}
|
| 104 |
},
|
| 105 |
"Math": {
|
|
|
|
| 124 |
"Gemini-2.5 Flash": 70.6,
|
| 125 |
"Claude 4.0 Sonnet": 66.9,
|
| 126 |
"Claude 3.7 Sonnet": 35.2,
|
| 127 |
+
"Claude 4.5 Sonnet": 73.1,
|
| 128 |
+
"Gemini 2.5 Pro": 74.4
|
| 129 |
}
|
| 130 |
},
|
| 131 |
"Topic": {
|
|
|
|
| 150 |
"Gemini-2.5 Flash": 87.2,
|
| 151 |
"Claude 4.0 Sonnet": 83.2,
|
| 152 |
"Claude 3.7 Sonnet": 84.9,
|
| 153 |
+
"Claude 4.5 Sonnet": 84.2,
|
| 154 |
+
"Gemini 2.5 Pro": 88.1
|
| 155 |
}
|
| 156 |
},
|
| 157 |
"RC": {
|
|
|
|
| 176 |
"Gemini-2.5 Flash": 42.2,
|
| 177 |
"Claude 4.0 Sonnet": 76.2,
|
| 178 |
"Claude 3.7 Sonnet": 65.1,
|
| 179 |
+
"Claude 4.5 Sonnet": 74.8,
|
| 180 |
+
"Gemini 2.5 Pro": 76.7
|
| 181 |
}
|
| 182 |
}
|
| 183 |
}
|