Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Add LLama3 e Gamini 2.0 Flash models from Google
Browse files- external_models_results.json +91 -1
external_models_results.json
CHANGED
|
@@ -245,7 +245,7 @@
|
|
| 245 |
{
|
| 246 |
"model": "llama_405b_instruct",
|
| 247 |
"name": "meta-llama/Meta-Llama-3.1-405B-Instruct (Vertex AI)",
|
| 248 |
-
"link": "https://
|
| 249 |
"date": "2024-08-20",
|
| 250 |
"status": "full",
|
| 251 |
"main_language": "English",
|
|
@@ -286,5 +286,95 @@
|
|
| 286 |
},
|
| 287 |
"result_metrics_average": 0.8231799251828895,
|
| 288 |
"result_metrics_npm": 0.7241097388486535
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
}
|
| 290 |
]
|
|
|
|
| 245 |
{
|
| 246 |
"model": "llama_405b_instruct",
|
| 247 |
"name": "meta-llama/Meta-Llama-3.1-405B-Instruct (Vertex AI)",
|
| 248 |
+
"link": "https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct",
|
| 249 |
"date": "2024-08-20",
|
| 250 |
"status": "full",
|
| 251 |
"main_language": "English",
|
|
|
|
| 286 |
},
|
| 287 |
"result_metrics_average": 0.8231799251828895,
|
| 288 |
"result_metrics_npm": 0.7241097388486535
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"model": "llama3_3_70b",
|
| 292 |
+
"name": "meta-llama/Llama-3.3-70B-Instruct (Vertex AI)",
|
| 293 |
+
"link": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
|
| 294 |
+
"date": "2025-04-03",
|
| 295 |
+
"status": "full",
|
| 296 |
+
"main_language": "English",
|
| 297 |
+
"model_type": "chat",
|
| 298 |
+
"params": 70.6,
|
| 299 |
+
"result_metrics": {
|
| 300 |
+
"enem_challenge": 0.8320503848845346,
|
| 301 |
+
"bluex": 0.7593880389429764,
|
| 302 |
+
"oab_exams": 0.6733485193621868,
|
| 303 |
+
"assin2_sts": 0.7275578599896508,
|
| 304 |
+
"assin2_rte": 0.9407071010860484,
|
| 305 |
+
"faquad_nli": 0.8787563033858187,
|
| 306 |
+
"hatebr_offensive": 0.9024358249091997,
|
| 307 |
+
"portuguese_hate_speech": 0.7042216543825339,
|
| 308 |
+
"tweetsentbr": 0.7076749453899551
|
| 309 |
+
},
|
| 310 |
+
"result_metrics_average": 0.791793403592545,
|
| 311 |
+
"result_metrics_npm": 0.6924788466103498
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"model": "llama3_2_90b",
|
| 315 |
+
"name": "meta-llama/Llama-3.2-90B-Vision-Instruct (Vertex AI)",
|
| 316 |
+
"link": "https://huggingface.co/meta-llama/Llama-3.2-90B-Vision-Instruct",
|
| 317 |
+
"date": "2025-04-03",
|
| 318 |
+
"status": "full",
|
| 319 |
+
"main_language": "English",
|
| 320 |
+
"model_type": "chat",
|
| 321 |
+
"params": 88.6,
|
| 322 |
+
"result_metrics": {
|
| 323 |
+
"enem_challenge": 0.821553533939818,
|
| 324 |
+
"bluex": 0.7482614742698191,
|
| 325 |
+
"oab_exams": 0.7061503416856492,
|
| 326 |
+
"assin2_sts": 0.7368518566379951,
|
| 327 |
+
"assin2_rte": 0.9216548775103446,
|
| 328 |
+
"faquad_nli": 0.8632015306122449,
|
| 329 |
+
"hatebr_offensive": 0.8965270877302478,
|
| 330 |
+
"portuguese_hate_speech": 0.7059127552081422,
|
| 331 |
+
"tweetsentbr": 0.7352076218951984
|
| 332 |
+
},
|
| 333 |
+
"result_metrics_average": 0.7928134532766066,
|
| 334 |
+
"result_metrics_npm": 0.6915070359785283
|
| 335 |
+
},
|
| 336 |
+
{
|
| 337 |
+
"model": "gemini-2.0-flash-001",
|
| 338 |
+
"name": "Gemini 2.0 Flash (001)",
|
| 339 |
+
"link": "https://cloud.google.com/vertex-ai",
|
| 340 |
+
"date": "2025-04-03",
|
| 341 |
+
"status": "full",
|
| 342 |
+
"main_language": "English",
|
| 343 |
+
"model_type": "proprietary",
|
| 344 |
+
"result_metrics": {
|
| 345 |
+
"enem_challenge": 0.8789363191042687,
|
| 346 |
+
"bluex": 0.803894297635605,
|
| 347 |
+
"oab_exams": 0.7767653758542141,
|
| 348 |
+
"assin2_sts": 0.8440142633742483,
|
| 349 |
+
"assin2_rte": 0.9305165510724053,
|
| 350 |
+
"faquad_nli": 0.7533651260745065,
|
| 351 |
+
"hatebr_offensive": 0.8890432813545366,
|
| 352 |
+
"portuguese_hate_speech": 0.7655392938544128,
|
| 353 |
+
"tweetsentbr": 0.7652542619451799
|
| 354 |
+
},
|
| 355 |
+
"result_metrics_average": 0.8230365300299308,
|
| 356 |
+
"result_metrics_npm": 0.7253778946033657
|
| 357 |
+
},
|
| 358 |
+
{
|
| 359 |
+
"model": "gemini-2.0-flash-lite-001",
|
| 360 |
+
"name": "Gemini 2.0 Flash Lite (001)",
|
| 361 |
+
"link": "https://cloud.google.com/vertex-ai",
|
| 362 |
+
"date": "2025-04-03",
|
| 363 |
+
"status": "full",
|
| 364 |
+
"main_language": "English",
|
| 365 |
+
"model_type": "proprietary",
|
| 366 |
+
"result_metrics": {
|
| 367 |
+
"enem_challenge": 0.8509447165850245,
|
| 368 |
+
"bluex": 0.7872044506258693,
|
| 369 |
+
"oab_exams": 0.7061503416856492,
|
| 370 |
+
"assin2_sts": 0.8492479991621328,
|
| 371 |
+
"assin2_rte": 0.9216548775103446,
|
| 372 |
+
"faquad_nli": 0.7652777777777777,
|
| 373 |
+
"hatebr_offensive": 0.8522499647780968,
|
| 374 |
+
"portuguese_hate_speech": 0.7501387383201693,
|
| 375 |
+
"tweetsentbr": 0.7675746509081982
|
| 376 |
+
},
|
| 377 |
+
"result_metrics_average": 0.8056048352614735,
|
| 378 |
+
"result_metrics_npm": 0.6986042497176748
|
| 379 |
}
|
| 380 |
]
|