diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.arc new file mode 100644 index 0000000000000000000000000000000000000000..01c7d1badccc784c411661bca2d3f230e2ceb121 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.arc @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 42 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-F16.gguf (version GGUF V3 (latest)) + +Final result: 61.7333 +/- 1.7759 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 40261.05 ms +llama_perf_context_print: prompt eval time = 149462.93 ms / 36666 tokens ( 4.08 ms per token, 245.32 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 150294.55 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.hsw new file mode 100644 index 0000000000000000000000000000000000000000..6f1f3dccf5657d22734e94295c7149ac8a68b37a --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.hsw @@ -0,0 +1,20 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 42 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-F16.gguf (version GGUF V3 (latest)) + +750 79.73333333% [76.7082%, 82.4554%] + + +llama_perf_context_print: load time = 2970.89 ms +llama_perf_context_print: prompt eval time = 528861.23 ms / 129319 tokens ( 4.09 ms per token, 244.52 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 532325.38 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..971b350c0a328bedc8361c9b323253462c33b903 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.mmlu @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 42 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-F16.gguf (version GGUF V3 (latest)) + +Final result: 40.8000 +/- 1.7958 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 2768.82 ms +llama_perf_context_print: prompt eval time = 276273.33 ms / 68956 tokens ( 4.01 ms per token, 249.59 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 277471.13 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.tqa new file mode 100644 index 0000000000000000000000000000000000000000..c776f36a2d2ffc60d7decae34d0ad7178ad9f8b8 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.tqa @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 42 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-F16.gguf (version GGUF V3 (latest)) + +Final result: 37.4667 +/- 1.7686 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 2730.94 ms +llama_perf_context_print: prompt eval time = 213745.70 ms / 51053 tokens ( 4.19 ms per token, 238.85 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 215254.19 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.wng new file mode 100644 index 0000000000000000000000000000000000000000..9cddb22122b64e4dab1fcb01d699f363ae0269a5 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-F16.wng @@ -0,0 +1,19 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 42 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-F16.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.4000 +/- 1.6334 + +llama_perf_context_print: load time = 2841.75 ms +llama_perf_context_print: prompt eval time = 91265.69 ms / 22541 tokens ( 4.05 ms per token, 246.98 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 91746.92 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.arc new file mode 100644 index 0000000000000000000000000000000000000000..5b103909aae93cc0c95fb9b98f179e58c65fdeb4 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.arc @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ3_M.gguf (version GGUF V3 (latest)) + +Final result: 65.3333 +/- 1.7389 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 4590.43 ms +llama_perf_context_print: prompt eval time = 152258.69 ms / 36666 tokens ( 4.15 ms per token, 240.81 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 153118.42 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.hsw new file mode 100644 index 0000000000000000000000000000000000000000..12c199fd13987a2973e322a724474155c807d14c --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.hsw @@ -0,0 +1,20 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ3_M.gguf (version GGUF V3 (latest)) + +750 80.00000000% [76.9876%, 82.7066%] + + +llama_perf_context_print: load time = 769.34 ms +llama_perf_context_print: prompt eval time = 538472.19 ms / 129319 tokens ( 4.16 ms per token, 240.16 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 542167.13 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..3b378a7b2755359c2d01583e22c64e78183eff2f --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.mmlu @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ3_M.gguf (version GGUF V3 (latest)) + +Final result: 39.6000 +/- 1.7870 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 806.33 ms +llama_perf_context_print: prompt eval time = 283405.69 ms / 68956 tokens ( 4.11 ms per token, 243.31 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 284720.03 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.ppx b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.ppx new file mode 100644 index 0000000000000000000000000000000000000000..0e5cf9b874c0da68158a0dd1f45e77abdf55657d --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 25.645736 ± 0.214748 +Mean PPL(base) : 20.457257 ± 0.161275 +Cor(ln(PPL(Q)), ln(PPL(base))): 95.06% +Mean ln(PPL(Q)/PPL(base)) : 0.226040 ± 0.002601 +Mean PPL(Q)/PPL(base) : 1.253625 ± 0.003260 +Mean PPL(Q)-PPL(base) : 5.188479 ± 0.079257 + +====== KL divergence statistics ====== +Mean KLD: 0.286497 ± 0.002130 +Maximum KLD: 27.942949 +99.9% KLD: 11.911683 +99.0% KLD: 3.204565 +99.0% KLD: 3.204565 +Median KLD: 0.139660 +10.0% KLD: 0.006744 + 5.0% KLD: 0.001423 + 1.0% KLD: 0.000043 +Minimum KLD: -0.000894 + +====== Token probability statistics ====== +Mean Δp: -0.836 ± 0.032 % +Maximum Δp: 99.942% +99.9% Δp: 76.283% +99.0% Δp: 35.932% +95.0% Δp: 16.691% +90.0% Δp: 9.040% +75.0% Δp: 1.129% +Median Δp: -0.019% +25.0% Δp: -2.256% +10.0% Δp: -11.765% + 5.0% Δp: -20.807% + 1.0% Δp: -43.493% + 0.1% Δp: -79.148% +Minimum Δp: -99.887% +RMS Δp : 12.354 ± 0.058 % +Same top p: 77.725 ± 0.107 % diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.tqa new file mode 100644 index 0000000000000000000000000000000000000000..d526db099af441b4e2a6073fea3725517f566b63 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.tqa @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ3_M.gguf (version GGUF V3 (latest)) + +Final result: 35.0667 +/- 1.7436 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 759.94 ms +llama_perf_context_print: prompt eval time = 216548.55 ms / 51053 tokens ( 4.24 ms per token, 235.76 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 218211.64 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.wng new file mode 100644 index 0000000000000000000000000000000000000000..ea7376ec2b4694720a648aa73b6ddf24336de733 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_m.wng @@ -0,0 +1,19 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ3_M.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 70.9333 +/- 1.6591 + +llama_perf_context_print: load time = 753.88 ms +llama_perf_context_print: prompt eval time = 94337.41 ms / 22541 tokens ( 4.19 ms per token, 238.94 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 94843.38 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.arc new file mode 100644 index 0000000000000000000000000000000000000000..a1ccf0cf8536da2bfc3dfbc56f06662ea1d928c6 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.arc @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ3_S.gguf (version GGUF V3 (latest)) + +Final result: 62.2667 +/- 1.7711 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 4272.29 ms +llama_perf_context_print: prompt eval time = 152740.59 ms / 36666 tokens ( 4.17 ms per token, 240.05 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 153595.41 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.hsw new file mode 100644 index 0000000000000000000000000000000000000000..5bc4988d34f2bbde28d177a48e334b43c16897ba --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.hsw @@ -0,0 +1,20 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ3_S.gguf (version GGUF V3 (latest)) + +750 78.00000000% [74.8968%, 80.8179%] + + +llama_perf_context_print: load time = 728.94 ms +llama_perf_context_print: prompt eval time = 537974.27 ms / 129319 tokens ( 4.16 ms per token, 240.38 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 541689.42 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..50bb160e405b6373dedc8dce8a5311f5d233174b --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.mmlu @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ3_S.gguf (version GGUF V3 (latest)) + +Final result: 39.4667 +/- 1.7860 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 757.43 ms +llama_perf_context_print: prompt eval time = 283333.69 ms / 68956 tokens ( 4.11 ms per token, 243.37 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 284662.93 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.ppx b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.ppx new file mode 100644 index 0000000000000000000000000000000000000000..b13b1e378cfaff9dafc4f3b7b92d2926eb3eca5c --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 32.512225 ± 0.267969 +Mean PPL(base) : 20.457257 ± 0.161275 +Cor(ln(PPL(Q)), ln(PPL(base))): 91.81% +Mean ln(PPL(Q)/PPL(base)) : 0.463278 ± 0.003282 +Mean PPL(Q)/PPL(base) : 1.589276 ± 0.005215 +Mean PPL(Q)-PPL(base) : 12.054968 ± 0.135871 + +====== KL divergence statistics ====== +Mean KLD: 0.484943 ± 0.002308 +Maximum KLD: 23.137739 +99.9% KLD: 11.445929 +99.0% KLD: 3.996119 +99.0% KLD: 3.996119 +Median KLD: 0.279736 +10.0% KLD: 0.019254 + 5.0% KLD: 0.004941 + 1.0% KLD: 0.000332 +Minimum KLD: -0.000675 + +====== Token probability statistics ====== +Mean Δp: -4.988 ± 0.042 % +Maximum Δp: 99.950% +99.9% Δp: 75.785% +99.0% Δp: 36.459% +95.0% Δp: 13.799% +90.0% Δp: 5.484% +75.0% Δp: 0.086% +Median Δp: -0.427% +25.0% Δp: -7.430% +10.0% Δp: -24.702% + 5.0% Δp: -37.934% + 1.0% Δp: -63.532% + 0.1% Δp: -87.413% +Minimum Δp: -99.733% +RMS Δp : 17.076 ± 0.062 % +Same top p: 70.771 ± 0.117 % diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.tqa new file mode 100644 index 0000000000000000000000000000000000000000..920768ab8299a8e2a7ff68b6eeddecd5803f6021 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.tqa @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ3_S.gguf (version GGUF V3 (latest)) + +Final result: 36.1333 +/- 1.7553 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 755.36 ms +llama_perf_context_print: prompt eval time = 216405.85 ms / 51053 tokens ( 4.24 ms per token, 235.91 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 217987.98 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.wng new file mode 100644 index 0000000000000000000000000000000000000000..2aaf21c875799c5ddb34fcd0100eff7643f70a0f --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq3_s.wng @@ -0,0 +1,19 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ3_S.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.9333 +/- 1.6235 + +llama_perf_context_print: load time = 752.81 ms +llama_perf_context_print: prompt eval time = 94369.26 ms / 22541 tokens ( 4.19 ms per token, 238.86 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 94874.43 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.arc new file mode 100644 index 0000000000000000000000000000000000000000..7dfca9c640a0e5e2e093a6da2ecafc1865f67c90 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.arc @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ4_NL.gguf (version GGUF V3 (latest)) + +Final result: 62.4000 +/- 1.7699 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 5546.40 ms +llama_perf_context_print: prompt eval time = 157820.59 ms / 36666 tokens ( 4.30 ms per token, 232.33 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 158658.47 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.hsw new file mode 100644 index 0000000000000000000000000000000000000000..eee649722ab91ca17f01ba996f7ff3b65192215b --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.hsw @@ -0,0 +1,20 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ4_NL.gguf (version GGUF V3 (latest)) + +750 79.46666667% [76.4290%, 82.2040%] + + +llama_perf_context_print: load time = 856.94 ms +llama_perf_context_print: prompt eval time = 558886.79 ms / 129319 tokens ( 4.32 ms per token, 231.39 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 562503.08 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..e32f4f6689bebd68178b9bef309ae1cf91781512 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.mmlu @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ4_NL.gguf (version GGUF V3 (latest)) + +Final result: 41.4667 +/- 1.8002 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 914.88 ms +llama_perf_context_print: prompt eval time = 294610.76 ms / 68956 tokens ( 4.27 ms per token, 234.06 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 295846.99 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.ppx b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.ppx new file mode 100644 index 0000000000000000000000000000000000000000..7eb585dad282fb554ba666cbd38d2fad2415b059 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 25.450410 ± 0.199570 +Mean PPL(base) : 20.457257 ± 0.161275 +Cor(ln(PPL(Q)), ln(PPL(base))): 96.78% +Mean ln(PPL(Q)/PPL(base)) : 0.218394 ± 0.001994 +Mean PPL(Q)/PPL(base) : 1.244077 ± 0.002481 +Mean PPL(Q)-PPL(base) : 4.993153 ± 0.059469 + +====== KL divergence statistics ====== +Mean KLD: 0.173444 ± 0.001426 +Maximum KLD: 22.223246 +99.9% KLD: 9.213535 +99.0% KLD: 1.556305 +99.0% KLD: 1.556305 +Median KLD: 0.081647 +10.0% KLD: 0.006194 + 5.0% KLD: 0.001743 + 1.0% KLD: 0.000131 +Minimum KLD: -0.000788 + +====== Token probability statistics ====== +Mean Δp: -3.757 ± 0.027 % +Maximum Δp: 99.782% +99.9% Δp: 44.427% +99.0% Δp: 18.257% +95.0% Δp: 5.991% +90.0% Δp: 2.080% +75.0% Δp: 0.027% +Median Δp: -0.328% +25.0% Δp: -5.093% +10.0% Δp: -15.863% + 5.0% Δp: -24.394% + 1.0% Δp: -43.233% + 0.1% Δp: -70.697% +Minimum Δp: -99.732% +RMS Δp : 11.165 ± 0.051 % +Same top p: 82.789 ± 0.097 % diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.tqa new file mode 100644 index 0000000000000000000000000000000000000000..b614b891170927ab73c89683e7076e7640fc0b77 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.tqa @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ4_NL.gguf (version GGUF V3 (latest)) + +Final result: 37.4667 +/- 1.7686 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 912.73 ms +llama_perf_context_print: prompt eval time = 225621.49 ms / 51053 tokens ( 4.42 ms per token, 226.28 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 227248.28 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.wng new file mode 100644 index 0000000000000000000000000000000000000000..66ae69d3d95740432d48ce2d950acebc19ac6185 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-iq4_nl.wng @@ -0,0 +1,19 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-IQ4_NL.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 71.3333 +/- 1.6523 + +llama_perf_context_print: load time = 937.74 ms +llama_perf_context_print: prompt eval time = 98116.26 ms / 22541 tokens ( 4.35 ms per token, 229.74 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 98670.87 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.arc new file mode 100644 index 0000000000000000000000000000000000000000..7dc51efd0b8ef8e644c6857ffe94f5b2ea1d4c26 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.arc @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_L.gguf (version GGUF V3 (latest)) + +Final result: 64.0000 +/- 1.7539 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 5136.60 ms +llama_perf_context_print: prompt eval time = 164152.27 ms / 36666 tokens ( 4.48 ms per token, 223.37 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 164972.95 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.hsw new file mode 100644 index 0000000000000000000000000000000000000000..c3dc8372298e462809ce50b53047181127767462 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.hsw @@ -0,0 +1,20 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_L.gguf (version GGUF V3 (latest)) + +750 77.20000000% [74.0633%, 80.0595%] + + +llama_perf_context_print: load time = 833.00 ms +llama_perf_context_print: prompt eval time = 580747.74 ms / 129319 tokens ( 4.49 ms per token, 222.68 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 584554.96 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..2177fe6d62f2bc077710f96fd38962b5bef6f95f --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.mmlu @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_L.gguf (version GGUF V3 (latest)) + +Final result: 41.4667 +/- 1.8002 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 788.56 ms +llama_perf_context_print: prompt eval time = 305965.89 ms / 68956 tokens ( 4.44 ms per token, 225.37 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 307240.49 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.ppx b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.ppx new file mode 100644 index 0000000000000000000000000000000000000000..73f3a4e4e030f5cc8882ac962b734bfeee7e1bcd --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 24.405174 ± 0.195423 +Mean PPL(base) : 20.457257 ± 0.161275 +Cor(ln(PPL(Q)), ln(PPL(base))): 95.33% +Mean ln(PPL(Q)/PPL(base)) : 0.176457 ± 0.002431 +Mean PPL(Q)/PPL(base) : 1.192984 ± 0.002901 +Mean PPL(Q)-PPL(base) : 3.947917 ± 0.064108 + +====== KL divergence statistics ====== +Mean KLD: 0.248285 ± 0.001640 +Maximum KLD: 22.227032 +99.9% KLD: 9.707264 +99.0% KLD: 2.304967 +99.0% KLD: 2.304967 +Median KLD: 0.123584 +10.0% KLD: 0.007649 + 5.0% KLD: 0.001796 + 1.0% KLD: 0.000084 +Minimum KLD: -0.000635 + +====== Token probability statistics ====== +Mean Δp: -2.090 ± 0.032 % +Maximum Δp: 99.189% +99.9% Δp: 64.212% +99.0% Δp: 31.519% +95.0% Δp: 13.797% +90.0% Δp: 6.831% +75.0% Δp: 0.560% +Median Δp: -0.057% +25.0% Δp: -3.331% +10.0% Δp: -14.319% + 5.0% Δp: -24.226% + 1.0% Δp: -48.172% + 0.1% Δp: -79.381% +Minimum Δp: -99.228% +RMS Δp : 12.606 ± 0.056 % +Same top p: 78.472 ± 0.106 % diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.tqa new file mode 100644 index 0000000000000000000000000000000000000000..9cbada9eccf0d9dbf2b8b06ee8bf7073535fbbc7 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.tqa @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_L.gguf (version GGUF V3 (latest)) + +Final result: 36.8000 +/- 1.7621 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 841.25 ms +llama_perf_context_print: prompt eval time = 233752.65 ms / 51053 tokens ( 4.58 ms per token, 218.41 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 235346.12 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.wng new file mode 100644 index 0000000000000000000000000000000000000000..4325a1923d115ee89c820a390efaa2fb78016327 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_l.wng @@ -0,0 +1,19 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_L.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.8000 +/- 1.6260 + +llama_perf_context_print: load time = 832.18 ms +llama_perf_context_print: prompt eval time = 101421.35 ms / 22541 tokens ( 4.50 ms per token, 222.25 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 101943.45 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.arc new file mode 100644 index 0000000000000000000000000000000000000000..8bcc42d3dc3547fe2c6a9dbb97b0c6d6302a36a8 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.arc @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_M.gguf (version GGUF V3 (latest)) + +Final result: 64.4000 +/- 1.7496 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 4355.26 ms +llama_perf_context_print: prompt eval time = 158252.70 ms / 36666 tokens ( 4.32 ms per token, 231.69 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 159087.45 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.hsw new file mode 100644 index 0000000000000000000000000000000000000000..a81e50a1728e581a8d1ed33f3324849993d59571 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.hsw @@ -0,0 +1,20 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_M.gguf (version GGUF V3 (latest)) + +750 77.60000000% [74.4798%, 80.4389%] + + +llama_perf_context_print: load time = 744.01 ms +llama_perf_context_print: prompt eval time = 561047.56 ms / 129319 tokens ( 4.34 ms per token, 230.50 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 564754.58 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..5edfe806131e877ba2dd98d738441e136dfadf33 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.mmlu @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_M.gguf (version GGUF V3 (latest)) + +Final result: 40.8000 +/- 1.7958 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 759.93 ms +llama_perf_context_print: prompt eval time = 295554.48 ms / 68956 tokens ( 4.29 ms per token, 233.31 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 296870.54 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.ppx b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.ppx new file mode 100644 index 0000000000000000000000000000000000000000..bb237df1b3ffb1e35c93a84ab699353a5a0cf331 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 24.750046 ± 0.198446 +Mean PPL(base) : 20.457257 ± 0.161275 +Cor(ln(PPL(Q)), ln(PPL(base))): 94.92% +Mean ln(PPL(Q)/PPL(base)) : 0.190490 ± 0.002538 +Mean PPL(Q)/PPL(base) : 1.209842 ± 0.003071 +Mean PPL(Q)-PPL(base) : 4.292789 ± 0.068077 + +====== KL divergence statistics ====== +Mean KLD: 0.273478 ± 0.001716 +Maximum KLD: 23.756779 +99.9% KLD: 9.973213 +99.0% KLD: 2.569659 +99.0% KLD: 2.569659 +Median KLD: 0.139089 +10.0% KLD: 0.008610 + 5.0% KLD: 0.002078 + 1.0% KLD: 0.000103 +Minimum KLD: -0.000674 + +====== Token probability statistics ====== +Mean Δp: -2.189 ± 0.033 % +Maximum Δp: 99.919% +99.9% Δp: 63.247% +99.0% Δp: 33.168% +95.0% Δp: 14.581% +90.0% Δp: 7.284% +75.0% Δp: 0.603% +Median Δp: -0.062% +25.0% Δp: -3.479% +10.0% Δp: -15.050% + 5.0% Δp: -25.583% + 1.0% Δp: -50.625% + 0.1% Δp: -81.793% +Minimum Δp: -99.620% +RMS Δp : 13.182 ± 0.057 % +Same top p: 77.344 ± 0.107 % diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.tqa new file mode 100644 index 0000000000000000000000000000000000000000..b5e8c93ada0803b3d17d00ee948196a59adb8948 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.tqa @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_M.gguf (version GGUF V3 (latest)) + +Final result: 36.9333 +/- 1.7635 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 738.68 ms +llama_perf_context_print: prompt eval time = 225672.28 ms / 51053 tokens ( 4.42 ms per token, 226.23 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 227200.49 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.wng new file mode 100644 index 0000000000000000000000000000000000000000..b050379345f4e2a1f268d3f18c5a138837845458 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_m.wng @@ -0,0 +1,19 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_M.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.9333 +/- 1.6235 + +llama_perf_context_print: load time = 750.12 ms +llama_perf_context_print: prompt eval time = 97927.96 ms / 22541 tokens ( 4.34 ms per token, 230.18 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 98426.78 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.arc new file mode 100644 index 0000000000000000000000000000000000000000..d2a76d54cbc769f07509d8b92ce57ca80898c295 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.arc @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_S.gguf (version GGUF V3 (latest)) + +Final result: 61.0667 +/- 1.7816 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 4042.36 ms +llama_perf_context_print: prompt eval time = 158459.88 ms / 36666 tokens ( 4.32 ms per token, 231.39 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 159304.81 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.hsw new file mode 100644 index 0000000000000000000000000000000000000000..f391a8ee06b8daf5a1359ce05df8866d9b9da5d8 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.hsw @@ -0,0 +1,20 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_S.gguf (version GGUF V3 (latest)) + +750 77.86666667% [74.7577%, 80.6916%] + + +llama_perf_context_print: load time = 704.36 ms +llama_perf_context_print: prompt eval time = 558138.31 ms / 129319 tokens ( 4.32 ms per token, 231.70 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 561898.16 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..17f05bdbf0ed7a0c64fa5a08288c0ecad740f80d --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.mmlu @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_S.gguf (version GGUF V3 (latest)) + +Final result: 40.9333 +/- 1.7967 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 707.14 ms +llama_perf_context_print: prompt eval time = 294106.51 ms / 68956 tokens ( 4.27 ms per token, 234.46 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 295434.68 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.ppx b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.ppx new file mode 100644 index 0000000000000000000000000000000000000000..4bbae2b397a2da055ae255caaf91f902df63d62e --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 25.982356 ± 0.208609 +Mean PPL(base) : 20.457257 ± 0.161275 +Cor(ln(PPL(Q)), ln(PPL(base))): 92.33% +Mean ln(PPL(Q)/PPL(base)) : 0.239080 ± 0.003120 +Mean PPL(Q)/PPL(base) : 1.270080 ± 0.003963 +Mean PPL(Q)-PPL(base) : 5.525099 ± 0.086044 + +====== KL divergence statistics ====== +Mean KLD: 0.410016 ± 0.002211 +Maximum KLD: 26.456850 +99.9% KLD: 11.191044 +99.0% KLD: 3.826428 +99.0% KLD: 3.826428 +Median KLD: 0.221591 +10.0% KLD: 0.014583 + 5.0% KLD: 0.003518 + 1.0% KLD: 0.000202 +Minimum KLD: -0.000602 + +====== Token probability statistics ====== +Mean Δp: -2.759 ± 0.040 % +Maximum Δp: 99.843% +99.9% Δp: 77.836% +99.0% Δp: 42.001% +95.0% Δp: 18.458% +90.0% Δp: 8.931% +75.0% Δp: 0.665% +Median Δp: -0.102% +25.0% Δp: -4.673% +10.0% Δp: -19.427% + 5.0% Δp: -31.918% + 1.0% Δp: -59.329% + 0.1% Δp: -87.266% +Minimum Δp: -99.664% +RMS Δp : 15.977 ± 0.062 % +Same top p: 72.822 ± 0.114 % diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.tqa new file mode 100644 index 0000000000000000000000000000000000000000..a706715f566faea8a81653f42b8e1f0ffb553005 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.tqa @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_S.gguf (version GGUF V3 (latest)) + +Final result: 38.2667 +/- 1.7759 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 689.04 ms +llama_perf_context_print: prompt eval time = 225077.84 ms / 51053 tokens ( 4.41 ms per token, 226.82 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 226715.57 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.wng new file mode 100644 index 0000000000000000000000000000000000000000..034d2636ac7ce2e18256b105d88baf1afbc81107 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q3_k_s.wng @@ -0,0 +1,19 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q3_K_S.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.0000 +/- 1.6406 + +llama_perf_context_print: load time = 737.04 ms +llama_perf_context_print: prompt eval time = 98097.25 ms / 22541 tokens ( 4.35 ms per token, 229.78 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 98614.54 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.arc new file mode 100644 index 0000000000000000000000000000000000000000..7d26fe53b6b03a96ad882e2ea474d6241643b7bf --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.arc @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final result: 62.0000 +/- 1.7736 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 5492.30 ms +llama_perf_context_print: prompt eval time = 164320.56 ms / 36666 tokens ( 4.48 ms per token, 223.14 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 165122.13 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.hsw new file mode 100644 index 0000000000000000000000000000000000000000..4c5d9f01276fee06e83a859ed6720f528523901f --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.hsw @@ -0,0 +1,20 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q4_K_M.gguf (version GGUF V3 (latest)) + +750 79.33333333% [76.2895%, 82.0782%] + + +llama_perf_context_print: load time = 898.60 ms +llama_perf_context_print: prompt eval time = 582056.25 ms / 129319 tokens ( 4.50 ms per token, 222.18 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 585789.58 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..d5a23cf4de881dec5edf65b517a72933e1e743c9 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.mmlu @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final result: 41.7333 +/- 1.8018 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 923.69 ms +llama_perf_context_print: prompt eval time = 306593.84 ms / 68956 tokens ( 4.45 ms per token, 224.91 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 307849.04 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.ppx b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.ppx new file mode 100644 index 0000000000000000000000000000000000000000..48ccd02f0e68098cb05f3ffd2a472cf5c6c38918 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 21.411111 ± 0.170166 +Mean PPL(base) : 20.457257 ± 0.161275 +Cor(ln(PPL(Q)), ln(PPL(base))): 98.21% +Mean ln(PPL(Q)/PPL(base)) : 0.045572 ± 0.001499 +Mean PPL(Q)/PPL(base) : 1.046627 ± 0.001569 +Mean PPL(Q)-PPL(base) : 0.953854 ± 0.032590 + +====== KL divergence statistics ====== +Mean KLD: 0.092796 ± 0.001164 +Maximum KLD: 25.177353 +99.9% KLD: 8.122986 +99.0% KLD: 0.871791 +99.0% KLD: 0.871791 +Median KLD: 0.036392 +10.0% KLD: 0.001979 + 5.0% KLD: 0.000420 + 1.0% KLD: 0.000007 +Minimum KLD: -0.000817 + +====== Token probability statistics ====== +Mean Δp: -0.463 ± 0.019 % +Maximum Δp: 99.023% +99.9% Δp: 44.662% +99.0% Δp: 19.835% +95.0% Δp: 9.041% +90.0% Δp: 4.857% +75.0% Δp: 0.673% +Median Δp: -0.004% +25.0% Δp: -1.191% +10.0% Δp: -6.370% + 5.0% Δp: -11.362% + 1.0% Δp: -24.949% + 0.1% Δp: -50.867% +Minimum Δp: -99.932% +RMS Δp : 7.244 ± 0.048 % +Same top p: 87.132 ± 0.086 % diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.tqa new file mode 100644 index 0000000000000000000000000000000000000000..30d2e6524a1d449ca9e2a5d6b563ca5625f987d2 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.tqa @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final result: 39.2000 +/- 1.7838 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 924.45 ms +llama_perf_context_print: prompt eval time = 234356.12 ms / 51053 tokens ( 4.59 ms per token, 217.84 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 235973.86 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.wng new file mode 100644 index 0000000000000000000000000000000000000000..46b7885cd0ec303be53f247efa6812cc884de954 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_m.wng @@ -0,0 +1,19 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 71.4667 +/- 1.6500 + +llama_perf_context_print: load time = 897.50 ms +llama_perf_context_print: prompt eval time = 101897.65 ms / 22541 tokens ( 4.52 ms per token, 221.21 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 102441.25 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.arc new file mode 100644 index 0000000000000000000000000000000000000000..f7945d874b2aa4dc8c725cd743e9152de2d96bc9 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.arc @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q4_K_S.gguf (version GGUF V3 (latest)) + +Final result: 62.9333 +/- 1.7648 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 5073.64 ms +llama_perf_context_print: prompt eval time = 163620.09 ms / 36666 tokens ( 4.46 ms per token, 224.09 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 164444.66 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.hsw new file mode 100644 index 0000000000000000000000000000000000000000..cae2a271dade801392cfe3899b63ff91bda17756 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.hsw @@ -0,0 +1,20 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q4_K_S.gguf (version GGUF V3 (latest)) + +750 78.40000000% [75.3141%, 81.1964%] + + +llama_perf_context_print: load time = 844.26 ms +llama_perf_context_print: prompt eval time = 578922.97 ms / 129319 tokens ( 4.48 ms per token, 223.38 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 582667.59 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..8363a955e050df01627515888a7a4477feaebc28 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.mmlu @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q4_K_S.gguf (version GGUF V3 (latest)) + +Final result: 40.4000 +/- 1.7930 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 898.56 ms +llama_perf_context_print: prompt eval time = 305282.54 ms / 68956 tokens ( 4.43 ms per token, 225.88 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 306510.96 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.ppx b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.ppx new file mode 100644 index 0000000000000000000000000000000000000000..34227559a95b24bccca8c33b64be7003b234b556 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 21.835641 ± 0.173204 +Mean PPL(base) : 20.457257 ± 0.161275 +Cor(ln(PPL(Q)), ln(PPL(base))): 97.63% +Mean ln(PPL(Q)/PPL(base)) : 0.065206 ± 0.001722 +Mean PPL(Q)/PPL(base) : 1.067379 ± 0.001839 +Mean PPL(Q)-PPL(base) : 1.378384 ± 0.038296 + +====== KL divergence statistics ====== +Mean KLD: 0.126128 ± 0.001376 +Maximum KLD: 20.797575 +99.9% KLD: 9.100409 +99.0% KLD: 1.331886 +99.0% KLD: 1.331886 +Median KLD: 0.049493 +10.0% KLD: 0.002711 + 5.0% KLD: 0.000614 + 1.0% KLD: 0.000019 +Minimum KLD: -0.000932 + +====== Token probability statistics ====== +Mean Δp: -0.835 ± 0.022 % +Maximum Δp: 99.548% +99.9% Δp: 58.381% +99.0% Δp: 22.128% +95.0% Δp: 9.729% +90.0% Δp: 5.041% +75.0% Δp: 0.594% +Median Δp: -0.011% +25.0% Δp: -1.630% +10.0% Δp: -8.012% + 5.0% Δp: -14.086% + 1.0% Δp: -29.714% + 0.1% Δp: -59.183% +Minimum Δp: -99.243% +RMS Δp : 8.429 ± 0.051 % +Same top p: 85.153 ± 0.091 % diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.tqa new file mode 100644 index 0000000000000000000000000000000000000000..b221bd5860086842f4dd3d518b1cf4b85ec4c9fa --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.tqa @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q4_K_S.gguf (version GGUF V3 (latest)) + +Final result: 38.5333 +/- 1.7783 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 889.94 ms +llama_perf_context_print: prompt eval time = 233318.87 ms / 51053 tokens ( 4.57 ms per token, 218.81 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 234983.06 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.wng new file mode 100644 index 0000000000000000000000000000000000000000..4d6bfd53f6cca8f0572cfb85396c9988ed06ed8b --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q4_k_s.wng @@ -0,0 +1,19 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q4_K_S.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 71.0667 +/- 1.6569 + +llama_perf_context_print: load time = 879.84 ms +llama_perf_context_print: prompt eval time = 101478.37 ms / 22541 tokens ( 4.50 ms per token, 222.13 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 101987.43 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.arc new file mode 100644 index 0000000000000000000000000000000000000000..d97217fac43158571664234cbff29d655710a03b --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.arc @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q5_K_M.gguf (version GGUF V3 (latest)) + +Final result: 62.5333 +/- 1.7686 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 6682.72 ms +llama_perf_context_print: prompt eval time = 163619.72 ms / 36666 tokens ( 4.46 ms per token, 224.09 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 164423.67 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.hsw new file mode 100644 index 0000000000000000000000000000000000000000..b96c5c8fd303779f47994c2cbc429c71afd6afd0 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.hsw @@ -0,0 +1,20 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q5_K_M.gguf (version GGUF V3 (latest)) + +750 79.46666667% [76.4290%, 82.2040%] + + +llama_perf_context_print: load time = 1058.28 ms +llama_perf_context_print: prompt eval time = 584266.16 ms / 129319 tokens ( 4.52 ms per token, 221.34 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 587808.42 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..07384fdb567c298b97c24f554d28a506a4aec3c8 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.mmlu @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q5_K_M.gguf (version GGUF V3 (latest)) + +Final result: 41.3333 +/- 1.7993 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 1080.04 ms +llama_perf_context_print: prompt eval time = 308584.68 ms / 68956 tokens ( 4.48 ms per token, 223.46 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 309765.36 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.ppx b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.ppx new file mode 100644 index 0000000000000000000000000000000000000000..bde847e0e42eb4dc582f64b2b5e55685af4c0969 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 20.724451 ± 0.164266 +Mean PPL(base) : 20.457257 ± 0.161275 +Cor(ln(PPL(Q)), ln(PPL(base))): 99.25% +Mean ln(PPL(Q)/PPL(base)) : 0.012977 ± 0.000968 +Mean PPL(Q)/PPL(base) : 1.013061 ± 0.000981 +Mean PPL(Q)-PPL(base) : 0.267194 ± 0.020144 + +====== KL divergence statistics ====== +Mean KLD: 0.035047 ± 0.000830 +Maximum KLD: 18.568987 +99.9% KLD: 5.169354 +99.0% KLD: 0.274628 +99.0% KLD: 0.274628 +Median KLD: 0.009469 +10.0% KLD: 0.000465 + 5.0% KLD: 0.000074 + 1.0% KLD: -0.000046 +Minimum KLD: -0.000819 + +====== Token probability statistics ====== +Mean Δp: -0.082 ± 0.011 % +Maximum Δp: 99.607% +99.9% Δp: 27.536% +99.0% Δp: 11.092% +95.0% Δp: 5.002% +90.0% Δp: 2.717% +75.0% Δp: 0.418% +Median Δp: -0.000% +25.0% Δp: -0.510% +10.0% Δp: -2.952% + 5.0% Δp: -5.379% + 1.0% Δp: -12.067% + 0.1% Δp: -28.989% +Minimum Δp: -98.714% +RMS Δp : 4.125 ± 0.051 % +Same top p: 92.842 ± 0.066 % diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.tqa new file mode 100644 index 0000000000000000000000000000000000000000..248c109296ef19afa1f4c2a8ae3a1d6e2634a781 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.tqa @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q5_K_M.gguf (version GGUF V3 (latest)) + +Final result: 38.1333 +/- 1.7748 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 1104.49 ms +llama_perf_context_print: prompt eval time = 235923.52 ms / 51053 tokens ( 4.62 ms per token, 216.40 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 237395.78 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.wng new file mode 100644 index 0000000000000000000000000000000000000000..0075c7f84eeb9616c38d2cd1e392771b3b12d0be --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_m.wng @@ -0,0 +1,19 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q5_K_M.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.5333 +/- 1.6309 + +llama_perf_context_print: load time = 1106.81 ms +llama_perf_context_print: prompt eval time = 102084.50 ms / 22541 tokens ( 4.53 ms per token, 220.81 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 102572.32 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.arc new file mode 100644 index 0000000000000000000000000000000000000000..5996c61ac1c0c208bea0bdc067c16bee3559ee0c --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.arc @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q5_K_S.gguf (version GGUF V3 (latest)) + +Final result: 63.0667 +/- 1.7635 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 6255.82 ms +llama_perf_context_print: prompt eval time = 163647.80 ms / 36666 tokens ( 4.46 ms per token, 224.05 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 164454.86 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.hsw new file mode 100644 index 0000000000000000000000000000000000000000..43c56ca7bc343d7a9b1669d92b7520fcd14b136f --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.hsw @@ -0,0 +1,20 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q5_K_S.gguf (version GGUF V3 (latest)) + +750 79.46666667% [76.4290%, 82.2040%] + + +llama_perf_context_print: load time = 1035.10 ms +llama_perf_context_print: prompt eval time = 578754.53 ms / 129319 tokens ( 4.48 ms per token, 223.44 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 582177.92 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..f3fd0047313957b2a2c305ca65f96c135371aa6d --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.mmlu @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q5_K_S.gguf (version GGUF V3 (latest)) + +Final result: 40.9333 +/- 1.7967 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 990.85 ms +llama_perf_context_print: prompt eval time = 303968.92 ms / 68956 tokens ( 4.41 ms per token, 226.85 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 305288.08 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.ppx b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.ppx new file mode 100644 index 0000000000000000000000000000000000000000..20d57cd234b43119424a94a2bf0530edbcfe2ab4 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 20.733047 ± 0.164062 +Mean PPL(base) : 20.457257 ± 0.161275 +Cor(ln(PPL(Q)), ln(PPL(base))): 99.11% +Mean ln(PPL(Q)/PPL(base)) : 0.013391 ± 0.001052 +Mean PPL(Q)/PPL(base) : 1.013481 ± 0.001067 +Mean PPL(Q)-PPL(base) : 0.275790 ± 0.021845 + +====== KL divergence statistics ====== +Mean KLD: 0.042653 ± 0.000923 +Maximum KLD: 20.481625 +99.9% KLD: 6.033762 +99.0% KLD: 0.356820 +99.0% KLD: 0.356820 +Median KLD: 0.012133 +10.0% KLD: 0.000630 + 5.0% KLD: 0.000108 + 1.0% KLD: -0.000026 +Minimum KLD: -0.000844 + +====== Token probability statistics ====== +Mean Δp: -0.154 ± 0.012 % +Maximum Δp: 99.910% +99.9% Δp: 31.943% +99.0% Δp: 12.335% +95.0% Δp: 5.465% +90.0% Δp: 2.946% +75.0% Δp: 0.441% +Median Δp: -0.001% +25.0% Δp: -0.624% +10.0% Δp: -3.482% + 5.0% Δp: -6.338% + 1.0% Δp: -13.948% + 0.1% Δp: -30.986% +Minimum Δp: -98.998% +RMS Δp : 4.585 ± 0.050 % +Same top p: 91.960 ± 0.070 % diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.tqa new file mode 100644 index 0000000000000000000000000000000000000000..8687316e37b773d4af7df16506689359c33f56a8 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.tqa @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q5_K_S.gguf (version GGUF V3 (latest)) + +Final result: 38.5333 +/- 1.7783 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 1036.92 ms +llama_perf_context_print: prompt eval time = 233178.56 ms / 51053 tokens ( 4.57 ms per token, 218.94 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 234803.92 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.wng new file mode 100644 index 0000000000000000000000000000000000000000..f33ee7c7ea25c1d9e3ce72faf5fb8ef9a88a359e --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q5_k_s.wng @@ -0,0 +1,19 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q5_K_S.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.2667 +/- 1.6358 + +llama_perf_context_print: load time = 1057.21 ms +llama_perf_context_print: prompt eval time = 101234.51 ms / 22541 tokens ( 4.49 ms per token, 222.66 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 101787.96 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.arc new file mode 100644 index 0000000000000000000000000000000000000000..f18e0d9dce611cc4e2ba83895b4bc530e0bf348b --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.arc @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q6_K.gguf (version GGUF V3 (latest)) + +Final result: 62.6667 +/- 1.7674 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 8437.03 ms +llama_perf_context_print: prompt eval time = 163908.33 ms / 36666 tokens ( 4.47 ms per token, 223.70 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 164717.73 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.hsw new file mode 100644 index 0000000000000000000000000000000000000000..77f5b677a38a7bcacb632bab0309bc11e024fc7a --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.hsw @@ -0,0 +1,20 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q6_K.gguf (version GGUF V3 (latest)) + +750 78.80000000% [75.7319%, 81.5746%] + + +llama_perf_context_print: load time = 1298.72 ms +llama_perf_context_print: prompt eval time = 577249.33 ms / 129319 tokens ( 4.46 ms per token, 224.03 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 580674.53 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..8bfc335c1b54b44262c7cf094c46637bed20cce7 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.mmlu @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q6_K.gguf (version GGUF V3 (latest)) + +Final result: 41.0667 +/- 1.7976 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 1355.58 ms +llama_perf_context_print: prompt eval time = 306661.70 ms / 68956 tokens ( 4.45 ms per token, 224.86 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 307820.94 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.ppx b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.ppx new file mode 100644 index 0000000000000000000000000000000000000000..c94e49c798ec5be2385ac2dd5210d430ac3d320c --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 20.450270 ± 0.161987 +Mean PPL(base) : 20.457257 ± 0.161275 +Cor(ln(PPL(Q)), ln(PPL(base))): 99.58% +Mean ln(PPL(Q)/PPL(base)) : -0.000342 ± 0.000726 +Mean PPL(Q)/PPL(base) : 0.999658 ± 0.000726 +Mean PPL(Q)-PPL(base) : -0.006987 ± 0.014846 + +====== KL divergence statistics ====== +Mean KLD: 0.016946 ± 0.000664 +Maximum KLD: 19.118521 +99.9% KLD: 3.575256 +99.0% KLD: 0.102086 +99.0% KLD: 0.102086 +Median KLD: 0.002504 +10.0% KLD: 0.000080 + 5.0% KLD: 0.000001 + 1.0% KLD: -0.000155 +Minimum KLD: -0.000800 + +====== Token probability statistics ====== +Mean Δp: 0.091 ± 0.007 % +Maximum Δp: 99.135% +99.9% Δp: 16.060% +99.0% Δp: 6.331% +95.0% Δp: 2.966% +90.0% Δp: 1.656% +75.0% Δp: 0.308% +Median Δp: 0.000% +25.0% Δp: -0.184% +10.0% Δp: -1.291% + 5.0% Δp: -2.466% + 1.0% Δp: -5.823% + 0.1% Δp: -16.768% +Minimum Δp: -98.122% +RMS Δp : 2.585 ± 0.062 % +Same top p: 95.983 ± 0.050 % diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.tqa new file mode 100644 index 0000000000000000000000000000000000000000..db390c1f9a0784b5cc008f33511e46162219fcca --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.tqa @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q6_K.gguf (version GGUF V3 (latest)) + +Final result: 37.4667 +/- 1.7686 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 1393.63 ms +llama_perf_context_print: prompt eval time = 233202.87 ms / 51053 tokens ( 4.57 ms per token, 218.92 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 234661.38 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.wng new file mode 100644 index 0000000000000000000000000000000000000000..2b8d66f7739bc36d3832f66e8160d91c50811bcb --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q6_k.wng @@ -0,0 +1,19 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q6_K.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.1333 +/- 1.6382 + +llama_perf_context_print: load time = 1293.29 ms +llama_perf_context_print: prompt eval time = 100972.92 ms / 22541 tokens ( 4.48 ms per token, 223.24 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 101444.48 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.arc new file mode 100644 index 0000000000000000000000000000000000000000..2713aec48e746189142535e207f60d7b76c755b7 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.arc @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q8_0.gguf (version GGUF V3 (latest)) + +Final result: 62.2667 +/- 1.7711 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 10023.59 ms +llama_perf_context_print: prompt eval time = 159351.92 ms / 36666 tokens ( 4.35 ms per token, 230.09 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 160153.25 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.hsw new file mode 100644 index 0000000000000000000000000000000000000000..52e9241e238649fba9625ba542e1571d43e43cce --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.hsw @@ -0,0 +1,20 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q8_0.gguf (version GGUF V3 (latest)) + +750 79.60000000% [76.5686%, 82.3297%] + + +llama_perf_context_print: load time = 1442.63 ms +llama_perf_context_print: prompt eval time = 565911.08 ms / 129319 tokens ( 4.38 ms per token, 228.51 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 569325.50 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..64907c721c0f7e795630023e066769fd2a05292d --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.mmlu @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q8_0.gguf (version GGUF V3 (latest)) + +Final result: 41.0667 +/- 1.7976 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 1399.49 ms +llama_perf_context_print: prompt eval time = 297408.08 ms / 68956 tokens ( 4.31 ms per token, 231.86 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 298607.06 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.ppx b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.ppx new file mode 100644 index 0000000000000000000000000000000000000000..2326a23002b83f7f72d9a60fc1d9ecc26e3546f6 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 20.469495 ± 0.162108 +Mean PPL(base) : 20.457257 ± 0.161275 +Cor(ln(PPL(Q)), ln(PPL(base))): 99.63% +Mean ln(PPL(Q)/PPL(base)) : 0.000598 ± 0.000684 +Mean PPL(Q)/PPL(base) : 1.000598 ± 0.000684 +Mean PPL(Q)-PPL(base) : 0.012238 ± 0.014001 + +====== KL divergence statistics ====== +Mean KLD: 0.014037 ± 0.000644 +Maximum KLD: 18.973799 +99.9% KLD: 3.339524 +99.0% KLD: 0.076466 +99.0% KLD: 0.076466 +Median KLD: 0.001239 +10.0% KLD: 0.000030 + 5.0% KLD: -0.000004 + 1.0% KLD: -0.000158 +Minimum KLD: -0.000871 + +====== Token probability statistics ====== +Mean Δp: 0.064 ± 0.006 % +Maximum Δp: 99.152% +99.9% Δp: 14.226% +99.0% Δp: 4.818% +95.0% Δp: 2.134% +90.0% Δp: 1.180% +75.0% Δp: 0.211% +Median Δp: 0.000% +25.0% Δp: -0.135% +10.0% Δp: -0.948% + 5.0% Δp: -1.811% + 1.0% Δp: -4.389% + 0.1% Δp: -12.829% +Minimum Δp: -98.121% +RMS Δp : 2.279 ± 0.073 % +Same top p: 96.918 ± 0.044 % diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.tqa new file mode 100644 index 0000000000000000000000000000000000000000..799ff3b7e843263ecfdd74e0a96944f9a5dc8edb --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.tqa @@ -0,0 +1,21 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q8_0.gguf (version GGUF V3 (latest)) + +Final result: 37.4667 +/- 1.7686 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 1387.05 ms +llama_perf_context_print: prompt eval time = 227243.78 ms / 51053 tokens ( 4.45 ms per token, 224.66 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 228732.32 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.wng new file mode 100644 index 0000000000000000000000000000000000000000..7037d5e1165a83dee0157da752417a48ff94f337 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-pruned-q8_0.wng @@ -0,0 +1,19 @@ +build: 5890 (982e3472) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 345 tensors from ./Mistral-Small-3.2-24B-Instruct-pruned-Q8_0.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.4000 +/- 1.6334 + +llama_perf_context_print: load time = 1379.67 ms +llama_perf_context_print: prompt eval time = 97861.52 ms / 22541 tokens ( 4.34 ms per token, 230.34 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 98365.30 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.arc new file mode 100644 index 0000000000000000000000000000000000000000..a6e3433544ecadd82ac36722d214fcced8b74b90 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.arc @@ -0,0 +1,21 @@ +build: 5920 (d9b69108) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 363 tensors from ./Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final result: 67.0667 +/- 1.7172 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 1031.43 ms +llama_perf_context_print: prompt eval time = 167688.21 ms / 36666 tokens ( 4.57 ms per token, 218.66 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 168485.14 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.hsw new file mode 100644 index 0000000000000000000000000000000000000000..b5fc95ffaef84c4132043371e5bb55730d88a075 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.hsw @@ -0,0 +1,20 @@ +build: 5920 (d9b69108) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 363 tensors from ./Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf (version GGUF V3 (latest)) + +750 83.60000000% [80.7800%, 86.0775%] + + +llama_perf_context_print: load time = 1044.18 ms +llama_perf_context_print: prompt eval time = 587180.06 ms / 129319 tokens ( 4.54 ms per token, 220.24 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 590570.49 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..5459c74d7e6d7be43ba950e441cdf444adbb83c4 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.mmlu @@ -0,0 +1,21 @@ +build: 5920 (d9b69108) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 363 tensors from ./Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final result: 45.7333 +/- 1.8203 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 1038.08 ms +llama_perf_context_print: prompt eval time = 309373.14 ms / 68956 tokens ( 4.49 ms per token, 222.89 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 310535.76 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.ppx b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.ppx new file mode 100644 index 0000000000000000000000000000000000000000..c16bd5d1e457ee4011f7a53bf2a922436f219256 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 5.162781 ± 0.029306 +Mean PPL(base) : 5.056724 ± 0.028289 +Cor(ln(PPL(Q)), ln(PPL(base))): 99.42% +Mean ln(PPL(Q)/PPL(base)) : 0.020757 ± 0.000612 +Mean PPL(Q)/PPL(base) : 1.020974 ± 0.000624 +Mean PPL(Q)-PPL(base) : 0.106058 ± 0.003259 + +====== KL divergence statistics ====== +Mean KLD: 0.022338 ± 0.000204 +Maximum KLD: 4.780019 +99.9% KLD: 1.095052 +99.0% KLD: 0.274427 +99.0% KLD: 0.274427 +Median KLD: 0.007652 +10.0% KLD: 0.000202 + 5.0% KLD: 0.000054 + 1.0% KLD: 0.000007 +Minimum KLD: -0.000021 + +====== Token probability statistics ====== +Mean Δp: -0.027 ± 0.012 % +Maximum Δp: 89.124% +99.9% Δp: 29.131% +99.0% Δp: 11.715% +95.0% Δp: 5.153% +90.0% Δp: 3.090% +75.0% Δp: 0.845% +Median Δp: 0.011% +25.0% Δp: -0.537% +10.0% Δp: -2.721% + 5.0% Δp: -5.164% + 1.0% Δp: -15.562% + 0.1% Δp: -48.222% +Minimum Δp: -95.132% +RMS Δp : 4.694 ± 0.046 % +Same top p: 94.185 ± 0.060 % diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.tqa new file mode 100644 index 0000000000000000000000000000000000000000..245d3b60ace7d5480b15dc68cf7e88f0c0506906 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.tqa @@ -0,0 +1,21 @@ +build: 5920 (d9b69108) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 363 tensors from ./Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final result: 36.2667 +/- 1.7567 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 1042.65 ms +llama_perf_context_print: prompt eval time = 236854.31 ms / 51053 tokens ( 4.64 ms per token, 215.55 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 238315.88 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.wng new file mode 100644 index 0000000000000000000000000000000000000000..3c9bd8a7ed6d12e007aa1334a6745a5d9245b7ee --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-bartowski.wng @@ -0,0 +1,19 @@ +build: 5920 (d9b69108) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 46 key-value pairs and 363 tensors from ./Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 79.4667 +/- 1.4760 + +llama_perf_context_print: load time = 1057.91 ms +llama_perf_context_print: prompt eval time = 103326.29 ms / 22541 tokens ( 4.58 ms per token, 218.15 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 103809.24 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.arc b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.arc new file mode 100644 index 0000000000000000000000000000000000000000..984e89411dd19e0a0dd790bea56a8ab387c14a5d --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.arc @@ -0,0 +1,21 @@ +build: 5920 (d9b69108) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 41 key-value pairs and 363 tensors from ./Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final result: 66.6667 +/- 1.7225 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 1080.77 ms +llama_perf_context_print: prompt eval time = 174189.48 ms / 36666 tokens ( 4.75 ms per token, 210.49 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 175023.75 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.hsw b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.hsw new file mode 100644 index 0000000000000000000000000000000000000000..a98d7442d29af089593fede73c878ab6740719df --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.hsw @@ -0,0 +1,20 @@ +build: 5920 (d9b69108) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 41 key-value pairs and 363 tensors from ./Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf (version GGUF V3 (latest)) + +750 84.00000000% [81.2040%, 86.4495%] + + +llama_perf_context_print: load time = 1103.94 ms +llama_perf_context_print: prompt eval time = 603762.78 ms / 129319 tokens ( 4.67 ms per token, 214.19 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 607383.67 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.mmlu b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.mmlu new file mode 100644 index 0000000000000000000000000000000000000000..0bb1224fd34ca6614c4f25fae53cbc33bc7016aa --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.mmlu @@ -0,0 +1,21 @@ +build: 5920 (d9b69108) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 41 key-value pairs and 363 tensors from ./Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final result: 45.8667 +/- 1.8207 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 1073.35 ms +llama_perf_context_print: prompt eval time = 319143.72 ms / 68956 tokens ( 4.63 ms per token, 216.07 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 320440.15 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.ppx b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.ppx new file mode 100644 index 0000000000000000000000000000000000000000..5c5ff31da1ac47c3611c37ce6904ed2a2bc65823 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 5.163345 ± 0.029336 +Mean PPL(base) : 5.056724 ± 0.028289 +Cor(ln(PPL(Q)), ln(PPL(base))): 99.43% +Mean ln(PPL(Q)/PPL(base)) : 0.020866 ± 0.000611 +Mean PPL(Q)/PPL(base) : 1.021085 ± 0.000624 +Mean PPL(Q)-PPL(base) : 0.106622 ± 0.003261 + +====== KL divergence statistics ====== +Mean KLD: 0.022240 ± 0.000199 +Maximum KLD: 5.010417 +99.9% KLD: 1.048761 +99.0% KLD: 0.266856 +99.0% KLD: 0.266856 +Median KLD: 0.007753 +10.0% KLD: 0.000200 + 5.0% KLD: 0.000053 + 1.0% KLD: 0.000006 +Minimum KLD: -0.000051 + +====== Token probability statistics ====== +Mean Δp: 0.009 ± 0.012 % +Maximum Δp: 80.790% +99.9% Δp: 28.955% +99.0% Δp: 12.066% +95.0% Δp: 5.255% +90.0% Δp: 3.184% +75.0% Δp: 0.877% +Median Δp: 0.011% +25.0% Δp: -0.528% +10.0% Δp: -2.689% + 5.0% Δp: -5.073% + 1.0% Δp: -15.432% + 0.1% Δp: -44.225% +Minimum Δp: -95.052% +RMS Δp : 4.648 ± 0.045 % +Same top p: 94.237 ± 0.060 % diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.tqa b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.tqa new file mode 100644 index 0000000000000000000000000000000000000000..47887a1843ca08e20bd4381d2f3df3d147815bf2 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.tqa @@ -0,0 +1,21 @@ +build: 5920 (d9b69108) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 41 key-value pairs and 363 tensors from ./Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final result: 35.7333 +/- 1.7510 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 1117.06 ms +llama_perf_context_print: prompt eval time = 244747.70 ms / 51053 tokens ( 4.79 ms per token, 208.59 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 246299.75 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.wng b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.wng new file mode 100644 index 0000000000000000000000000000000000000000..e215c3b0532aa91e1c1f7307f92425798f40fe55 --- /dev/null +++ b/scores/Mistral-Small-3.2-24B-Instruct-2506-q4_k_m-unsloth.wng @@ -0,0 +1,19 @@ +build: 5920 (d9b69108) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 41 key-value pairs and 363 tensors from ./Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 79.3333 +/- 1.4795 + +llama_perf_context_print: load time = 1147.40 ms +llama_perf_context_print: prompt eval time = 106307.66 ms / 22541 tokens ( 4.72 ms per token, 212.04 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 106820.62 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)