Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	
		Yotam-Perlitz
		
	commited on
		
		
					Commit 
							
							·
						
						32f9aec
	
1
								Parent(s):
							
							a3b611d
								
replace mybench
Browse filesSigned-off-by: Yotam-Perlitz <[email protected]>
- assets/mybench_240901.csv +28 -53
 
    	
        assets/mybench_240901.csv
    CHANGED
    
    | 
         @@ -1,53 +1,28 @@ 
     | 
|
| 1 | 
         
            -
            model, 
     | 
| 2 | 
         
            -
             
     | 
| 3 | 
         
            -
             
     | 
| 4 | 
         
            -
             
     | 
| 5 | 
         
            -
             
     | 
| 6 | 
         
            -
             
     | 
| 7 | 
         
            -
             
     | 
| 8 | 
         
            -
             
     | 
| 9 | 
         
            -
             
     | 
| 10 | 
         
            -
             
     | 
| 11 | 
         
            -
             
     | 
| 12 | 
         
            -
             
     | 
| 13 | 
         
            -
             
     | 
| 14 | 
         
            -
             
     | 
| 15 | 
         
            -
             
     | 
| 16 | 
         
            -
             
     | 
| 17 | 
         
            -
             
     | 
| 18 | 
         
            -
             
     | 
| 19 | 
         
            -
             
     | 
| 20 | 
         
            -
             
     | 
| 21 | 
         
            -
             
     | 
| 22 | 
         
            -
             
     | 
| 23 | 
         
            -
             
     | 
| 24 | 
         
            -
             
     | 
| 25 | 
         
            -
             
     | 
| 26 | 
         
            -
             
     | 
| 27 | 
         
            -
             
     | 
| 28 | 
         
            -
             
     | 
| 29 | 
         
            -
            qwen1.5_72b_chat,28.89,mybench_average
         
     | 
| 30 | 
         
            -
            command_r,27.23,mybench_average
         
     | 
| 31 | 
         
            -
            phi_3_small_128k_instruct,27.19,mybench_average
         
     | 
| 32 | 
         
            -
            meta_llama_3_8b_instruct,26.67,mybench_average
         
     | 
| 33 | 
         
            -
            qwen2_7b_instruct,26.45,mybench_average
         
     | 
| 34 | 
         
            -
            phi_3_small_8k_instruct,26.24,mybench_average
         
     | 
| 35 | 
         
            -
            openhermes_2.5_mistral_7b,23.3,mybench_average
         
     | 
| 36 | 
         
            -
            mixtral_8x7b_instruct_v0.1,22.5,mybench_average
         
     | 
| 37 | 
         
            -
            mistral_7b_instruct_v0.2,19.33,mybench_average
         
     | 
| 38 | 
         
            -
            phi_3_mini_4k_instruct,19.27,mybench_average
         
     | 
| 39 | 
         
            -
            zephyr_7b_alpha,19.22,mybench_average
         
     | 
| 40 | 
         
            -
            phi_3_mini_128k_instruct,18.04,mybench_average
         
     | 
| 41 | 
         
            -
            zephyr_7b_beta,17.32,mybench_average
         
     | 
| 42 | 
         
            -
            deepseek_v2_lite_chat,17.14,mybench_average
         
     | 
| 43 | 
         
            -
            qwen1.5_7b_chat,16.5,mybench_average
         
     | 
| 44 | 
         
            -
            starling_lm_7b_beta,16.44,mybench_average
         
     | 
| 45 | 
         
            -
            vicuna_7b_v1.5_16k,13.71,mybench_average
         
     | 
| 46 | 
         
            -
            vicuna_7b_v1.5,11.73,mybench_average
         
     | 
| 47 | 
         
            -
            qwen1.5_4b_chat,11.13,mybench_average
         
     | 
| 48 | 
         
            -
            llama_2_7b_chat,10.25,mybench_average
         
     | 
| 49 | 
         
            -
            qwen2_1.5b_instruct,9.96,mybench_average
         
     | 
| 50 | 
         
            -
            yi_6b_chat,8.79,mybench_average
         
     | 
| 51 | 
         
            -
            qwen2_0.5b_instruct,6.78,mybench_average
         
     | 
| 52 | 
         
            -
            qwen1.5_1.8b_chat,6.09,mybench_average
         
     | 
| 53 | 
         
            -
            qwen1.5_0.5b_chat,5.26,mybench_average
         
     | 
| 
         | 
|
| 1 | 
         
            +
            model,agentbench
         
     | 
| 2 | 
         
            +
            gpt-4-0613,4.01
         
     | 
| 3 | 
         
            +
            claude-2,2.49
         
     | 
| 4 | 
         
            +
            claude-v1.3,2.44
         
     | 
| 5 | 
         
            +
            gpt-3.5-turbo-0613,2.32
         
     | 
| 6 | 
         
            +
            text-davinci-003,1.71
         
     | 
| 7 | 
         
            +
            claude-instant-v1.1,1.60
         
     | 
| 8 | 
         
            +
            chat-bison-001,1.39
         
     | 
| 9 | 
         
            +
            text-davinci-002,1.25
         
     | 
| 10 | 
         
            +
            llama-2-70b-chat,0.78
         
     | 
| 11 | 
         
            +
            guanaco-65b,0.54
         
     | 
| 12 | 
         
            +
            codellama-34b-instruct,0.96
         
     | 
| 13 | 
         
            +
            vicuna-33b-v1.3,0.73
         
     | 
| 14 | 
         
            +
            wizardlm-30b-v1.0,0.46
         
     | 
| 15 | 
         
            +
            guanaco-33b,0.39
         
     | 
| 16 | 
         
            +
            vicuna-13b-v1.5,0.93
         
     | 
| 17 | 
         
            +
            llama-2-13b-chat,0.77
         
     | 
| 18 | 
         
            +
            openchat-13b-v3.2,0.70
         
     | 
| 19 | 
         
            +
            wizardlm-13b-v1.2,0.66
         
     | 
| 20 | 
         
            +
            vicuna-7b-v1.5,0.56
         
     | 
| 21 | 
         
            +
            codellama-13b-instruct,0.56
         
     | 
| 22 | 
         
            +
            codellama-7b-instruct,0.50
         
     | 
| 23 | 
         
            +
            koala-13b,0.34
         
     | 
| 24 | 
         
            +
            llama-2-7b-chat,0.34
         
     | 
| 25 | 
         
            +
            codegeex2-6b,0.27
         
     | 
| 26 | 
         
            +
            dolly-12b-v2,0.14
         
     | 
| 27 | 
         
            +
            chatglm-6b-v1.1,0.11
         
     | 
| 28 | 
         
            +
            oasst-12b-sft-4,0.03
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         |