Spaces:
Running
Running
| # /// script | |
| # dependencies = [ | |
| # "matplotlib", | |
| # ] | |
| # /// | |
| import json | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from pathlib import Path | |
| import os | |
| # Get result directories from environment variables | |
| gptoss_dir = os.environ.get('UVNOTE_INPUT_GPTOSS_RUN', '.') | |
| megablocks_dir = os.environ.get('UVNOTE_INPUT_MEGABLOCKS_RUN', '.') | |
| print(f"Loading benchmark results from:") | |
| print(f" GPT-OSS dir: {gptoss_dir}") | |
| print(f" MegaBlocks dir: {megablocks_dir}") | |
| # Load benchmark results | |
| gptoss_file = Path(gptoss_dir) / 'gptoss_results.json' | |
| megablocks_file = Path(megablocks_dir) / 'megablocks_results.json' | |
| print(f"Loading results from:") | |
| print(f" GPT-OSS: {gptoss_file}") | |
| print(f" MegaBlocks: {megablocks_file}") | |
| if not gptoss_file.exists(): | |
| print(f"Warning: {gptoss_file} not found") | |
| if not megablocks_file.exists(): | |
| print(f"Warning: {megablocks_file} not found") | |
| with open(gptoss_file, 'r') as f: | |
| gptoss_results = json.load(f) | |
| with open(megablocks_file, 'r') as f: | |
| megablocks_results = json.load(f) | |
| print(f"GPT-OSS results keys: {list(gptoss_results.keys())}") | |
| print(f"MegaBlocks results keys: {list(megablocks_results.keys())}") | |
| # Helper function to extract metrics from either old or new JSON format | |
| def get_metric(results, metric_name, default=0): | |
| """Extract metric from results, handling both old and new JSON formats.""" | |
| # New format (with stats dict) | |
| if 'stats' in results: | |
| return results['stats'].get(metric_name, default) | |
| # Old format (direct keys) | |
| elif metric_name in results: | |
| return results[metric_name] | |
| else: | |
| return default | |
| # Create comparison plots | |
| fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10)) | |
| # Performance comparison | |
| implementations = ['GPT-OSS', 'MegaBlocks'] | |
| # Extract timing metrics (handle both avg_ms and avg_time_ms) | |
| gpt_time = get_metric(gptoss_results, 'avg_ms', get_metric(gptoss_results, 'avg_time_ms', 0)) | |
| mega_time = get_metric(megablocks_results, 'avg_ms', get_metric(megablocks_results, 'avg_time_ms', 0)) | |
| times = [gpt_time, mega_time] | |
| # Extract throughput metrics | |
| gpt_throughput = get_metric(gptoss_results, 'tokens_per_s', get_metric(gptoss_results, 'throughput_tokens_per_sec', 0)) | |
| mega_throughput = get_metric(megablocks_results, 'tokens_per_s', get_metric(megablocks_results, 'throughput_tokens_per_sec', 0)) | |
| throughputs = [gpt_throughput, mega_throughput] | |
| # Extract memory metrics | |
| gpt_memory = get_metric(gptoss_results, 'memory_allocated_gb', 0) | |
| mega_memory = get_metric(megablocks_results, 'memory_allocated_gb', 0) | |
| memory_usage = [gpt_memory, mega_memory] | |
| gpt_mem_inc = get_metric(gptoss_results, 'memory_increase_gb', 0) | |
| mega_mem_inc = get_metric(megablocks_results, 'memory_increase_gb', 0) | |
| memory_increase = [gpt_mem_inc, mega_mem_inc] | |
| print(f"Extracted metrics:") | |
| print(f" Times (ms): {times}") | |
| print(f" Throughputs: {throughputs}") | |
| print(f" Memory usage (GB): {memory_usage}") | |
| print(f" Memory increase (GB): {memory_increase}") | |
| colors = ['#2E8B57', '#4169E1'] | |
| # Latency comparison | |
| bars1 = ax1.bar(implementations, times, color=colors) | |
| ax1.set_ylabel('Average Time (ms)') | |
| ax1.set_title('Latency Comparison') | |
| ax1.grid(True, alpha=0.3) | |
| # Add values on bars | |
| for bar, time in zip(bars1, times): | |
| height = bar.get_height() | |
| ax1.text(bar.get_x() + bar.get_width()/2., height + height*0.01, | |
| f'{time:.2f}ms', ha='center', va='bottom') | |
| # Throughput comparison | |
| bars2 = ax2.bar(implementations, throughputs, color=colors) | |
| ax2.set_ylabel('Tokens per Second') | |
| ax2.set_title('Throughput Comparison') | |
| ax2.grid(True, alpha=0.3) | |
| # Add values on bars | |
| for bar, throughput in zip(bars2, throughputs): | |
| height = bar.get_height() | |
| ax2.text(bar.get_x() + bar.get_width()/2., height + height*0.01, | |
| f'{throughput:.0f}', ha='center', va='bottom') | |
| # Memory usage comparison | |
| bars3 = ax3.bar(implementations, memory_usage, color=colors) | |
| ax3.set_ylabel('Memory Allocated (GB)') | |
| ax3.set_title('Memory Usage Comparison') | |
| ax3.grid(True, alpha=0.3) | |
| # Add values on bars | |
| for bar, mem in zip(bars3, memory_usage): | |
| height = bar.get_height() | |
| ax3.text(bar.get_x() + bar.get_width()/2., height + height*0.01, | |
| f'{mem:.2f}GB', ha='center', va='bottom') | |
| # Memory increase comparison | |
| bars4 = ax4.bar(implementations, memory_increase, color=colors) | |
| ax4.set_ylabel('Memory Increase (GB)') | |
| ax4.set_title('Memory Increase Comparison') | |
| ax4.grid(True, alpha=0.3) | |
| # Add values on bars | |
| for bar, mem_inc in zip(bars4, memory_increase): | |
| height = bar.get_height() | |
| ax4.text(bar.get_x() + bar.get_width()/2., height + height*0.01, | |
| f'{mem_inc:.3f}GB', ha='center', va='bottom') | |
| plt.tight_layout() | |
| plt.savefig('small_moe_comparison.png', dpi=150, bbox_inches='tight') | |
| plt.show() | |
| # Print summary table | |
| print("\n" + "="*60) | |
| print("PERFORMANCE COMPARISON SUMMARY") | |
| print("="*60) | |
| print(f"{'Metric':<25} {'GPT-OSS':<15} {'MegaBlocks':<15} {'Winner':<10}") | |
| print("-" * 60) | |
| # Determine winners | |
| latency_winner = "GPT-OSS" if times[0] < times[1] else "MegaBlocks" | |
| throughput_winner = "GPT-OSS" if throughputs[0] > throughputs[1] else "MegaBlocks" | |
| memory_winner = "GPT-OSS" if memory_usage[0] < memory_usage[1] else "MegaBlocks" | |
| mem_inc_winner = "GPT-OSS" if memory_increase[0] < memory_increase[1] else "MegaBlocks" | |
| print(f"{'Latency (ms)':<25} {times[0]:<15.2f} {times[1]:<15.2f} {latency_winner:<10}") | |
| print(f"{'Throughput (tok/s)':<25} {throughputs[0]:<15.0f} {throughputs[1]:<15.0f} {throughput_winner:<10}") | |
| print(f"{'Memory Usage (GB)':<25} {memory_usage[0]:<15.3f} {memory_usage[1]:<15.3f} {memory_winner:<10}") | |
| print(f"{'Memory Increase (GB)':<25} {memory_increase[0]:<15.3f} {memory_increase[1]:<15.3f} {mem_inc_winner:<10}") | |
| # Speed ratio | |
| speed_ratio = times[1] / times[0] if times[0] < times[1] else times[0] / times[1] | |
| faster_impl = latency_winner | |
| print(f"\n{faster_impl} is {speed_ratio:.2f}x faster") | |
| # Throughput ratio | |
| throughput_ratio = max(throughputs) / min(throughputs) | |
| higher_throughput = throughput_winner | |
| print(f"{higher_throughput} has {throughput_ratio:.2f}x higher throughput") | |
| print("="*60) |