11"""
2- Hardware benchmark script for CI runners.
2+ """ Hardware benchmark script for CI runners .
33Compares CPU and GPU performance to diagnose slowdowns .
44Works on both CPU - only (GitHub Actions ) and GPU (RunsOn ) runners .
55"""
66import time
77import platform
88import os
9+ import json
10+ from datetime import datetime
11+
12+ # Global results dictionary for JSON output
13+ RESULTS = {
14+ "timestamp": datetime.now().isoformat(),
15+ "execution_method": "bare_metal",
16+ "system": {},
17+ "benchmarks": {}
18+ }
919
1020def get_cpu_info():
1121 """ Get CPU information ."""
@@ -16,6 +26,10 @@ def get_cpu_info():
1626 print(f"Processor: {platform.processor()}")
1727 print(f"Python: {platform.python_version()}")
1828
29+ RESULTS["system"]["platform"] = platform.platform()
30+ RESULTS["system"]["processor"] = platform.processor()
31+ RESULTS["system"]["python_version"] = platform.python_version()
32+
1933 # Try to get CPU frequency
2034 try:
2135 with open('/proc/cpuinfo', 'r') as f:
@@ -38,18 +52,23 @@ def get_cpu_info():
3852
3953 # CPU count
4054 print(f"CPU Count: {os.cpu_count()}")
55+ RESULTS["system"]["cpu_count"] = os.cpu_count()
4156
4257 # Check for GPU
4358 try:
4459 import subprocess
4560 result = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'],
4661 capture_output=True, text=True, timeout=5)
4762 if result.returncode == 0:
48- print (f"GPU: { result .stdout .strip ()} " )
63+ gpu_info = result.stdout.strip()
64+ print(f"GPU: {gpu_info}")
65+ RESULTS["system"]["gpu"] = gpu_info
4966 else:
5067 print("GPU: None detected")
68+ RESULTS["system"]["gpu"] = None
5169 except:
5270 print("GPU: None detected (nvidia-smi not available)")
71+ RESULTS["system"]["gpu"] = None
5372
5473 print()
5574
@@ -59,11 +78,14 @@ def benchmark_cpu_pure_python():
5978 print("CPU BENCHMARK: Pure Python")
6079 print("=" * 60)
6180
81+ results = {}
82+
6283 # Integer computation
6384 start = time.perf_counter()
6485 total = sum(i * i for i in range(10_000_000))
6586 elapsed = time.perf_counter() - start
6687 print(f"Integer sum (10M iterations): {elapsed:.3f} seconds")
88+ results["integer_sum_10M"] = elapsed
6789
6890 # Float computation
6991 start = time.perf_counter()
@@ -72,6 +94,9 @@ def benchmark_cpu_pure_python():
7294 total += (i * 0.1) ** 0.5
7395 elapsed = time.perf_counter() - start
7496 print(f"Float sqrt (1M iterations): {elapsed:.3f} seconds")
97+ results["float_sqrt_1M"] = elapsed
98+
99+ RESULTS["benchmarks"]["pure_python"] = results
75100 print()
76101
77102def benchmark_cpu_numpy():
@@ -82,6 +107,8 @@ def benchmark_cpu_numpy():
82107 print("CPU BENCHMARK: NumPy")
83108 print("=" * 60)
84109
110+ results = {}
111+
85112 # Matrix multiplication
86113 n = 3000
87114 A = np.random.randn(n, n)
@@ -91,6 +118,7 @@ def benchmark_cpu_numpy():
91118 C = A @ B
92119 elapsed = time.perf_counter() - start
93120 print(f"Matrix multiply ({n}x{n}): {elapsed:.3f} seconds")
121+ results["matmul_3000x3000"] = elapsed
94122
95123 # Element-wise operations
96124 x = np.random.randn(50_000_000)
@@ -99,6 +127,9 @@ def benchmark_cpu_numpy():
99127 y = np.cos(x**2) + np.sin(x)
100128 elapsed = time.perf_counter() - start
101129 print(f"Element-wise ops (50M elements): {elapsed:.3f} seconds")
130+ results["elementwise_50M"] = elapsed
131+
132+ RESULTS["benchmarks"]["numpy"] = results
102133 print()
103134
104135def benchmark_gpu_jax():
@@ -107,12 +138,18 @@ def benchmark_gpu_jax():
107138 import jax
108139 import jax.numpy as jnp
109140
141+ results = {}
142+
110143 devices = jax.devices()
111144 default_backend = jax.default_backend()
112145
113146 # Check if GPU is available
114147 has_gpu = any('cuda' in str(d).lower() or 'gpu' in str(d).lower() for d in devices)
115148
149+ results["has_gpu"] = has_gpu
150+ results["default_backend"] = default_backend
151+ results["devices"] = [str(d) for d in devices]
152+
116153 print("=" * 60)
117154 if has_gpu:
118155 print("JAX BENCHMARK: GPU")
@@ -141,12 +178,14 @@ def matmul(a, b):
141178 C = matmul(A, B).block_until_ready()
142179 warmup_time = time.perf_counter() - start
143180 print(f"Warm-up (includes JIT compile, {n}x{n}): {warmup_time:.3f} seconds")
181+ results["matmul_1000x1000_warmup"] = warmup_time
144182
145183 # Actual benchmark (compiled)
146184 start = time.perf_counter()
147185 C = matmul(A, B).block_until_ready()
148186 elapsed = time.perf_counter() - start
149187 print(f"Matrix multiply compiled ({n}x{n}): {elapsed:.3f} seconds")
188+ results["matmul_1000x1000_compiled"] = elapsed
150189
151190 # Larger matrix
152191 n = 3000
@@ -158,12 +197,14 @@ def matmul(a, b):
158197 C = matmul(A, B).block_until_ready()
159198 warmup_time = time.perf_counter() - start
160199 print(f"Warm-up (recompile for {n}x{n}): {warmup_time:.3f} seconds")
200+ results["matmul_3000x3000_warmup"] = warmup_time
161201
162202 # Benchmark compiled
163203 start = time.perf_counter()
164204 C = matmul(A, B).block_until_ready()
165205 elapsed = time.perf_counter() - start
166206 print(f"Matrix multiply compiled ({n}x{n}): {elapsed:.3f} seconds")
207+ results["matmul_3000x3000_compiled"] = elapsed)
167208
168209 # Element-wise GPU benchmark
169210 x = jax.random.normal(key, (50_000_000,))
@@ -177,26 +218,33 @@ def elementwise_ops(x):
177218 y = elementwise_ops(x).block_until_ready()
178219 warmup_time = time.perf_counter() - start
179220 print(f"Element-wise warm-up (50M): {warmup_time:.3f} seconds")
221+ results["elementwise_50M_warmup"] = warmup_time
180222
181223 # Compiled
182224 start = time.perf_counter()
183225 y = elementwise_ops(x).block_until_ready()
184226 elapsed = time.perf_counter() - start
185227 print(f"Element-wise compiled (50M): {elapsed:.3f} seconds")
228+ results["elementwise_50M_compiled"] = elapsed
186229
230+ RESULTS["benchmarks"]["jax"] = results
187231 print()
188232
189233 except ImportError as e:
190234 print(f"JAX not available: {e}")
235+ RESULTS["benchmarks"]["jax"] = {"error": str(e)}
191236 except Exception as e:
192237 print(f"JAX benchmark failed: {e}")
238+ RESULTS["benchmarks"]["jax"] = {"error": str(e)})
193239
194240def benchmark_numba():
195241 """ Numba CPU benchmark ."""
196242 try:
197243 import numba
198244 import numpy as np
199245
246+ results = {}
247+
200248 print("=" * 60)
201249 print("CPU BENCHMARK: Numba")
202250 print("=" * 60)
@@ -213,12 +261,14 @@ def numba_sum(n):
213261 result = numba_sum(10_000_000)
214262 warmup_time = time.perf_counter() - start
215263 print(f"Integer sum warm-up (includes compile): {warmup_time:.3f} seconds")
264+ results["integer_sum_10M_warmup"] = warmup_time
216265
217266 # Compiled run
218267 start = time.perf_counter()
219268 result = numba_sum(10_000_000)
220269 elapsed = time.perf_counter() - start
221270 print(f"Integer sum compiled (10M): {elapsed:.3f} seconds")
271+ results["integer_sum_10M_compiled"] = elapsed)
222272
223273 @numba.jit(nopython=True, parallel=True)
224274 def numba_parallel_sum(arr):
@@ -234,19 +284,24 @@ def numba_parallel_sum(arr):
234284 result = numba_parallel_sum(arr)
235285 warmup_time = time.perf_counter() - start
236286 print(f"Parallel sum warm-up (50M): {warmup_time:.3f} seconds")
287+ results["parallel_sum_50M_warmup"] = warmup_time
237288
238289 # Compiled
239290 start = time.perf_counter()
240291 result = numba_parallel_sum(arr)
241292 elapsed = time.perf_counter() - start
242293 print(f"Parallel sum compiled (50M): {elapsed:.3f} seconds")
294+ results["parallel_sum_50M_compiled"] = elapsed
243295
296+ RESULTS["benchmarks"]["numba"] = results
244297 print()
245298
246299 except ImportError as e:
247300 print(f"Numba not available: {e}")
301+ RESULTS["benchmarks"]["numba"] = {"error": str(e)}
248302 except Exception as e:
249303 print(f"Numba benchmark failed: {e}")
304+ RESULTS["benchmarks"]["numba"] = {"error": str(e)})
250305
251306if __name__ == "__main__":
252307 print("\n " + "=" * 60)
@@ -262,3 +317,9 @@ def numba_parallel_sum(arr):
262317 print("=" * 60)
263318 print("BENCHMARK COMPLETE")
264319 print("=" * 60)
320+
321+ # Save results to JSON
322+ output_file = "benchmark_results_baremetal.json"
323+ with open(output_file, 'w') as f:
324+ json.dump(RESULTS, f, indent=2)
325+ print(f"\n Results saved to {output_file}")
0 commit comments