Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
456a3cb
1
Parent(s):
78e9bc6
Add Claude 3.7 Sonnet to the leaderboard
Browse files- external_models_results.json +22 -0
external_models_results.json
CHANGED
|
@@ -531,5 +531,27 @@
|
|
| 531 |
},
|
| 532 |
"result_metrics_average": 0.7648947194678011,
|
| 533 |
"result_metrics_npm": 0.6490441260447987
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 534 |
}
|
| 535 |
]
|
|
|
|
| 531 |
},
|
| 532 |
"result_metrics_average": 0.7648947194678011,
|
| 533 |
"result_metrics_npm": 0.6490441260447987
|
| 534 |
+
},
|
| 535 |
+
{
|
| 536 |
+
"model": "claude-3-7-sonnet-20250219",
|
| 537 |
+
"name": "Claude 3.7 Sonnet (2025-02-19)",
|
| 538 |
+
"link": "https://www.anthropic.com/",
|
| 539 |
+
"date": "2025-04-03",
|
| 540 |
+
"status": "full",
|
| 541 |
+
"main_language": "English",
|
| 542 |
+
"model_type": "proprietary",
|
| 543 |
+
"result_metrics": {
|
| 544 |
+
"enem_challenge": 0.8901329601119664,
|
| 545 |
+
"bluex": 0.8456189151599444,
|
| 546 |
+
"oab_exams": 0.8355353075170843,
|
| 547 |
+
"assin2_sts": 0.8087979933117393,
|
| 548 |
+
"assin2_rte": 0.9472965253044003,
|
| 549 |
+
"faquad_nli": 0.8097848807348216,
|
| 550 |
+
"hatebr_offensive": 0.9125114739050616,
|
| 551 |
+
"portuguese_hate_speech": 0.7698524509742262,
|
| 552 |
+
"tweetsentbr": 0.7842080985659372
|
| 553 |
+
},
|
| 554 |
+
"result_metrics_average": 0.8448598450650201,
|
| 555 |
+
"result_metrics_npm": 0.7622301724524201
|
| 556 |
}
|
| 557 |
]
|