[feat] Benchmark runner and mlpackage generator: run_benchmarks.sh for full test suite, gen_mlpackages.py for CoreML model generation

This commit is contained in:
Erik Bray 2026-03-03 14:29:04 +01:00
parent 37cac988b8
commit 517f1e45bb
2 changed files with 367 additions and 0 deletions

88
scripts/gen_mlpackages.py Normal file
View File

@ -0,0 +1,88 @@
#!/usr/bin/env python3
"""
Generate /tmp/ane_sram_{ch}ch_{sp}sp.mlpackage models for ANE benchmarks.
Each model is a single 1x1 conv: fp32_in -> cast_fp16 -> conv -> cast_fp32 -> out
Covers all configs needed by inmem_basic, inmem_bench, sram_bench, sram_probe.
"""
import numpy as np
import os
import sys
try:
import coremltools as ct
from coremltools.converters.mil import Builder as mb
from coremltools.converters.mil.mil import types
except ImportError:
print("ERROR: coremltools not installed. Install with: pip install coremltools", file=sys.stderr)
sys.exit(1)
CONFIGS = [
(256, 64), (512, 64), (1024, 64), (1536, 64),
(2048, 64), (2560, 64), (3072, 64), (3584, 64),
(4096, 64), (4608, 64), (5120, 64), (6144, 64),
(8192, 32),
]
def gen_model(ch, sp):
"""Build a coremltools MIL model with a single 1x1 conv."""
@mb.program(
input_specs=[mb.TensorSpec(shape=(1, ch, 1, sp), dtype=types.fp32)],
opset_version=ct.target.iOS18,
)
def prog(x):
x_fp16 = mb.cast(x=x, dtype="fp16", name="cast_in")
w = np.random.randn(ch, ch, 1, 1).astype(np.float16) * 0.01
c = mb.conv(
x=x_fp16,
weight=w,
pad_type="valid",
strides=[1, 1],
dilations=[1, 1],
groups=1,
name="c0",
)
out = mb.cast(x=c, dtype="fp32", name="cast_out")
return out
model = ct.convert(
prog,
minimum_deployment_target=ct.target.iOS18,
compute_precision=ct.precision.FLOAT16,
)
return model
def main():
created = 0
skipped = 0
print(f"Generating {len(CONFIGS)} mlpackage models in /tmp/...")
for ch, sp in CONFIGS:
path = f"/tmp/ane_sram_{ch}ch_{sp}sp.mlpackage"
w_mb = ch * ch * 2 / 1024 / 1024
if os.path.exists(path):
print(f" [skip] {ch}ch x {sp}sp (exists)")
skipped += 1
continue
print(f" [gen] {ch}ch x {sp}sp (weights: {w_mb:.1f} MB)...", end="", flush=True)
try:
model = gen_model(ch, sp)
model.save(path)
print(" OK")
created += 1
except Exception as e:
print(f" FAILED: {e}")
print(f"\nDone: {created} created, {skipped} skipped (already existed).")
return 0 if created + skipped == len(CONFIGS) else 1
if __name__ == "__main__":
sys.exit(main())

279
scripts/run_benchmarks.sh Executable file
View File

@ -0,0 +1,279 @@
#!/bin/bash
# run_benchmarks.sh -- ANE Training Benchmark Runner
# Builds and runs benchmarks, collects results into a timestamped report.
#
# Usage:
# bash scripts/run_benchmarks.sh [OPTIONS]
#
# Options:
# --all Run everything (default)
# --training-only Run only training benchmarks
# --probes-only Run only probe/test suite
# --benchmarks-only Run only root-level benchmarks (inmem_peak)
# --steps N Training steps (default: 100)
# --help Show this help
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
TRAINING_DIR="$ROOT_DIR/training"
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
RESULTS_FILE="$ROOT_DIR/benchmark_results_${TIMESTAMP}.txt"
# Defaults
RUN_TRAINING=true
RUN_PROBES=true
RUN_BENCHMARKS=true
STEPS=100
# Color output helpers
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
log_info() { echo -e "${CYAN}[INFO]${NC} $*"; }
log_success() { echo -e "${GREEN}[PASS]${NC} $*"; }
log_fail() { echo -e "${RED}[FAIL]${NC} $*"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
log_header() { echo -e "\n${CYAN}========================================${NC}"; echo -e "${CYAN} $*${NC}"; echo -e "${CYAN}========================================${NC}"; }
# Parse arguments
while [[ $# -gt 0 ]]; do
case "$1" in
--all)
RUN_TRAINING=true; RUN_PROBES=true; RUN_BENCHMARKS=true; shift ;;
--training-only)
RUN_TRAINING=true; RUN_PROBES=false; RUN_BENCHMARKS=false; shift ;;
--probes-only)
RUN_TRAINING=false; RUN_PROBES=true; RUN_BENCHMARKS=false; shift ;;
--benchmarks-only)
RUN_TRAINING=false; RUN_PROBES=false; RUN_BENCHMARKS=true; shift ;;
--steps)
STEPS="$2"; shift 2 ;;
--help|-h)
head -14 "$0" | tail -13
exit 0 ;;
*)
echo "Unknown option: $1"; exit 1 ;;
esac
done
# Initialize results file
{
echo "ANE Training Benchmark Results"
echo "=============================="
echo "Date: $(date)"
echo "Machine: $(sysctl -n hw.model 2>/dev/null || echo 'unknown')"
echo "macOS: $(sw_vers -productVersion 2>/dev/null || echo 'unknown')"
echo "Chip: $(sysctl -n machdep.cpu.brand_string 2>/dev/null || echo 'unknown')"
echo "Steps: $STEPS"
echo ""
} > "$RESULTS_FILE"
log_info "Results will be saved to: $RESULTS_FILE"
# ──────────────────────────────────────────────
# Prerequisite checks
# ──────────────────────────────────────────────
log_header "Prerequisite Checks"
if [[ "$(uname)" != "Darwin" ]]; then
log_fail "This benchmark requires macOS. Detected: $(uname)"
exit 1
fi
log_success "macOS detected"
if ! sysctl -n hw.optional.arm64 2>/dev/null | grep -q 1; then
log_fail "Apple Silicon required. This appears to be an Intel Mac."
exit 1
fi
log_success "Apple Silicon detected"
if ! xcrun --find clang >/dev/null 2>&1; then
log_fail "Xcode command line tools required. Run: xcode-select --install"
exit 1
fi
log_success "Xcode CLI tools available"
PASS_COUNT=0
FAIL_COUNT=0
SKIP_COUNT=0
run_build_and_test() {
local name="$1"
local build_cmd="$2"
local run_cmd="$3"
local workdir="${4:-$ROOT_DIR}"
log_info "Building $name..."
local build_output
if ! build_output=$(cd "$workdir" && bash -c "$build_cmd" 2>&1); then
log_fail "$name -- build failed"
echo "[$name] BUILD FAILED" >> "$RESULTS_FILE"
echo "$build_output" >> "$RESULTS_FILE"
echo "" >> "$RESULTS_FILE"
FAIL_COUNT=$((FAIL_COUNT + 1))
return 1
fi
log_info "Running $name..."
echo "--- $name ---" >> "$RESULTS_FILE"
local output
if output=$(cd "$workdir" && bash -c "$run_cmd" 2>&1); then
echo "$output" >> "$RESULTS_FILE"
echo "" >> "$RESULTS_FILE"
log_success "$name completed"
PASS_COUNT=$((PASS_COUNT + 1))
else
echo "$output" >> "$RESULTS_FILE"
echo "EXIT CODE: $?" >> "$RESULTS_FILE"
echo "" >> "$RESULTS_FILE"
log_fail "$name -- run failed (output captured in results file)"
FAIL_COUNT=$((FAIL_COUNT + 1))
return 1
fi
}
# ──────────────────────────────────────────────
# Training Benchmarks
# ──────────────────────────────────────────────
if $RUN_TRAINING; then
log_header "Training Benchmarks ($STEPS steps)"
echo "" >> "$RESULTS_FILE"
echo "=== TRAINING BENCHMARKS ===" >> "$RESULTS_FILE"
echo "" >> "$RESULTS_FILE"
run_build_and_test \
"train_large (CPU classifier)" \
"make train_large" \
"./train_large --steps $STEPS" \
"$TRAINING_DIR" || true
run_build_and_test \
"train_large_ane (ANE classifier)" \
"make train_large_ane" \
"./train_large_ane --steps $STEPS" \
"$TRAINING_DIR" || true
fi
# ──────────────────────────────────────────────
# Probe Tests
# ──────────────────────────────────────────────
if $RUN_PROBES; then
log_header "Probe Tests"
echo "" >> "$RESULTS_FILE"
echo "=== PROBE TESTS ===" >> "$RESULTS_FILE"
echo "" >> "$RESULTS_FILE"
PROBE_TESTS=("test_rmsnorm_bwd" "test_classifier" "test_weight_reload" "test_perf_stats" "test_qos_sweep" "test_ane_advanced")
for test_name in "${PROBE_TESTS[@]}"; do
run_build_and_test \
"$test_name" \
"make $test_name" \
"./$test_name" \
"$TRAINING_DIR" || true
done
fi
# ──────────────────────────────────────────────
# Root-Level Benchmarks
# ──────────────────────────────────────────────
if $RUN_BENCHMARKS; then
log_header "Root-Level Benchmarks"
echo "" >> "$RESULTS_FILE"
echo "=== ROOT-LEVEL BENCHMARKS ===" >> "$RESULTS_FILE"
echo "" >> "$RESULTS_FILE"
CC="xcrun clang"
CFLAGS="-O2 -fobjc-arc -framework Foundation -framework CoreML -framework IOSurface -ldl"
# Generate mlpackage models needed by sram/inmem benchmarks
if ! ls /tmp/ane_sram_*ch_*sp.mlpackage >/dev/null 2>&1; then
log_info "Generating mlpackage models for benchmarks..."
VENV_PYTHON=""
if [[ -x /tmp/ane_venv/bin/python3 ]]; then
VENV_PYTHON="/tmp/ane_venv/bin/python3"
else
for pyver in 3.12 3.13 3.11; do
PY="/opt/homebrew/opt/python@${pyver}/bin/python${pyver}"
if [[ -x "$PY" ]]; then
log_info "Creating venv with Python $pyver for coremltools..."
"$PY" -m venv /tmp/ane_venv && /tmp/ane_venv/bin/pip install -q coremltools numpy 2>/dev/null
VENV_PYTHON="/tmp/ane_venv/bin/python3"
break
fi
done
fi
if [[ -n "$VENV_PYTHON" ]] && "$VENV_PYTHON" "$SCRIPT_DIR/gen_mlpackages.py" 2>/dev/null; then
log_success "mlpackage models generated"
else
log_warn "Failed to generate mlpackage models (need Python 3.11-3.13 + coremltools)"
fi
else
log_info "mlpackage models already exist in /tmp/"
fi
run_build_and_test \
"inmem_peak (Peak TFLOPS)" \
"$CC $CFLAGS -o inmem_peak inmem_peak.m" \
"./inmem_peak" \
"$ROOT_DIR" || true
for bench in inmem_basic inmem_bench sram_bench sram_probe; do
if ls /tmp/ane_sram_*ch_*sp.mlpackage >/dev/null 2>&1; then
run_build_and_test \
"$bench" \
"$CC $CFLAGS -o $bench ${bench}.m" \
"./$bench" \
"$ROOT_DIR" || true
else
log_warn "$bench -- SKIPPED (mlpackage generation failed)"
echo "[$bench] SKIPPED -- mlpackage generation failed" >> "$RESULTS_FILE"
echo "" >> "$RESULTS_FILE"
SKIP_COUNT=$((SKIP_COUNT + 1))
fi
done
fi
# ──────────────────────────────────────────────
# Summary
# ──────────────────────────────────────────────
log_header "Summary"
TOTAL=$((PASS_COUNT + FAIL_COUNT + SKIP_COUNT))
{
echo ""
echo "=== SUMMARY ==="
echo "Total: $TOTAL"
echo "Passed: $PASS_COUNT"
echo "Failed: $FAIL_COUNT"
echo "Skipped: $SKIP_COUNT"
} >> "$RESULTS_FILE"
echo ""
log_info "Total: $TOTAL"
log_success "Passed: $PASS_COUNT"
if [[ $FAIL_COUNT -gt 0 ]]; then
log_fail "Failed: $FAIL_COUNT"
else
log_info "Failed: 0"
fi
if [[ $SKIP_COUNT -gt 0 ]]; then
log_warn "Skipped: $SKIP_COUNT"
fi
echo ""
log_info "Full results saved to: $RESULTS_FILE"