| { | |
| "qwen2.5-3b-instruct": { | |
| "success_rate": 0.0624, | |
| "relative_action_count": 2.4255102697302697 | |
| }, | |
| "llama-3.2-3b-instruct": { | |
| "success_rate": 0.064, | |
| "relative_action_count": 2.4438042524142523 | |
| }, | |
| "llama-3.1-8b-instruct": { | |
| "success_rate": 0.11599999999999999, | |
| "relative_action_count": 2.3321907026307027 | |
| }, | |
| "gpt-3.5-turbo": { | |
| "success_rate": 0.12079999999999999, | |
| "relative_action_count": 2.34957508047508 | |
| }, | |
| "gemma-2-9b-it": { | |
| "success_rate": 0.132, | |
| "relative_action_count": 2.3394684981684977 | |
| }, | |
| "qwen2.5-7b-instruct": { | |
| "success_rate": 0.1664, | |
| "relative_action_count": 2.3259762459762454 | |
| }, | |
| "gemma-2-27b-it": { | |
| "success_rate": 0.1696, | |
| "relative_action_count": 2.28467764013764 | |
| }, | |
| "llama-3.1-70b-instruct": { | |
| "success_rate": 0.256, | |
| "relative_action_count": 1.9653564912864916 | |
| }, | |
| "yi-lightning": { | |
| "success_rate": 0.30720000000000003, | |
| "relative_action_count": 2.031278719058719 | |
| }, | |
| "gpt-4o-mini": { | |
| "success_rate": 0.31040000000000006, | |
| "relative_action_count": 1.9804984304584305 | |
| }, | |
| "llama-3.3-70b-instruct": { | |
| "success_rate": 0.33840000000000003, | |
| "relative_action_count": 1.90917626040626 | |
| }, | |
| "claude-3.5-haiku": { | |
| "success_rate": 0.3592000000000001, | |
| "relative_action_count": 2.0113219180819177 | |
| }, | |
| "gemini-1.5-pro": { | |
| "success_rate": 0.36879999999999996, | |
| "relative_action_count": 1.9371788544788544 | |
| }, | |
| "qwen2.5-14b-instruct": { | |
| "success_rate": 0.3816, | |
| "relative_action_count": 1.9383408547008547 | |
| }, | |
| "qwen2.5-72b-instruct": { | |
| "success_rate": 0.4008, | |
| "relative_action_count": 1.8648658674658674 | |
| }, | |
| "mistral-large-instruct-2411": { | |
| "success_rate": 0.4144, | |
| "relative_action_count": 1.795764299034299 | |
| }, | |
| "qwen2.5-32b-instruct": { | |
| "success_rate": 0.43920000000000003, | |
| "relative_action_count": 1.8831460717060717 | |
| }, | |
| "claude-3.5-sonnet": { | |
| "success_rate": 0.44000000000000006, | |
| "relative_action_count": 1.6636790032190032 | |
| }, | |
| "gpt-4o": { | |
| "success_rate": 0.44960000000000006, | |
| "relative_action_count": 1.7164597657897656 | |
| }, | |
| "deepseek-v3": { | |
| "success_rate": 0.5496000000000001, | |
| "relative_action_count": 1.705338828948829 | |
| }, | |
| "deepseek-r1": { | |
| "success_rate": 0.6112, | |
| "relative_action_count": 1.4205231568431569 | |
| }, | |
| "qwq-32b": { | |
| "success_rate": 0.6112, | |
| "relative_action_count": 1.5151790675990677 | |
| }, | |
| "o1-mini": { | |
| "success_rate": 0.6296, | |
| "relative_action_count": 1.4230264535464534 | |
| } | |
| } | |