diff --git a/.github/workflows/firmware-qemu.yml b/.github/workflows/firmware-qemu.yml index 3f628331..c44d26e2 100644 --- a/.github/workflows/firmware-qemu.yml +++ b/.github/workflows/firmware-qemu.yml @@ -61,8 +61,9 @@ jobs: - name: Verify QEMU binary run: | + file_size() { stat -c%s "$1" 2>/dev/null || stat -f%z "$1" 2>/dev/null || wc -c < "$1"; } /opt/qemu-esp32/bin/qemu-system-xtensa --version - echo "QEMU binary size: $(stat -c%s /opt/qemu-esp32/bin/qemu-system-xtensa) bytes" + echo "QEMU binary size: $(file_size /opt/qemu-esp32/bin/qemu-system-xtensa) bytes" - name: Upload QEMU artifact uses: actions/upload-artifact@v4 @@ -147,7 +148,8 @@ jobs: $OTA_ARGS \ 0x20000 build/esp32-csi-node.bin - echo "Flash image size: $(stat -c%s build/qemu_flash.bin) bytes" + file_size() { stat -c%s "$1" 2>/dev/null || stat -f%z "$1" 2>/dev/null || wc -c < "$1"; } + echo "Flash image size: $(file_size build/qemu_flash.bin) bytes" - name: Inject NVS partition if: matrix.nvs_config != 'default' @@ -155,7 +157,8 @@ jobs: run: | NVS_BIN="build/nvs_matrix/nvs_${{ matrix.nvs_config }}.bin" if [ -f "$NVS_BIN" ]; then - echo "Injecting NVS: $NVS_BIN ($(stat -c%s "$NVS_BIN") bytes)" + file_size() { stat -c%s "$1" 2>/dev/null || stat -f%z "$1" 2>/dev/null || wc -c < "$1"; } + echo "Injecting NVS: $NVS_BIN ($(file_size "$NVS_BIN") bytes)" dd if="$NVS_BIN" of=build/qemu_flash.bin \ bs=1 seek=$((0x9000)) conv=notrunc 2>/dev/null else @@ -228,7 +231,7 @@ jobs: - name: Check for crashes working-directory: firmware/esp32-csi-node/test run: | - CRASHES=$(find . -name "crash-*" -o -name "oom-*" -o -name "timeout-*" 2>/dev/null | wc -l) + CRASHES=$(find . -type f -name "crash-*" -o -type f -name "oom-*" -o -type f -name "timeout-*" 2>/dev/null | wc -l) echo "Crash artifacts found: $CRASHES" if [ "$CRASHES" -gt 0 ]; then echo "::error::Fuzzer found $CRASHES crash/oom/timeout artifacts" @@ -264,7 +267,7 @@ jobs: - name: Verify all binaries generated run: | EXPECTED=14 - ACTUAL=$(ls build/nvs_matrix/nvs_*.bin 2>/dev/null | wc -l) + ACTUAL=$(find build/nvs_matrix -type f -name "nvs_*.bin" 2>/dev/null | wc -l) echo "Generated $ACTUAL / $EXPECTED NVS binaries" ls -la build/nvs_matrix/ @@ -275,8 +278,9 @@ jobs: - name: Verify binary sizes run: | + file_size() { stat -c%s "$1" 2>/dev/null || stat -f%z "$1" 2>/dev/null || wc -c < "$1"; } for f in build/nvs_matrix/nvs_*.bin; do - SIZE=$(stat -c%s "$f") + SIZE=$(file_size "$f") if [ "$SIZE" -ne 24576 ]; then echo "::error::$f has unexpected size $SIZE (expected 24576)" exit 1 diff --git a/docs/adr/ADR-061-qemu-esp32s3-firmware-testing.md b/docs/adr/ADR-061-qemu-esp32s3-firmware-testing.md index 057e9c26..64aa1ace 100644 --- a/docs/adr/ADR-061-qemu-esp32s3-firmware-testing.md +++ b/docs/adr/ADR-061-qemu-esp32s3-firmware-testing.md @@ -32,6 +32,98 @@ Currently, **every code change requires flashing to physical hardware** on COM7. Espressif maintains an official QEMU fork (`github.com/espressif/qemu`) with ESP32-S3 machine support, including dual-core Xtensa LX7, flash mapping, UART, GPIO, timers, and FreeRTOS. +## Glossary + +| Term | Definition | +|------|-----------| +| CSI | Channel State Information — per-subcarrier amplitude/phase from WiFi | +| NVS | Non-Volatile Storage — ESP-IDF key-value flash partition | +| TDM | Time-Division Multiplexing — nodes transmit in assigned time slots | +| UART | Universal Asynchronous Receiver-Transmitter — serial console output | +| SLIRP | User-mode TCP/IP stack — enables networking without root/TAP | +| QEMU | Quick Emulator — runs ESP32-S3 firmware without physical hardware | +| QMP | QEMU Machine Protocol — JSON-based control interface | +| LFSR | Linear Feedback Shift Register — deterministic pseudo-random generator | +| SPSC | Single Producer Single Consumer — lock-free ring buffer pattern | +| FreeRTOS | Real-time OS used by ESP-IDF for task scheduling | +| gcov/lcov | GCC code coverage tools for line/branch analysis | +| libFuzzer | LLVM coverage-guided fuzzer for finding crashes | +| ASAN | AddressSanitizer — detects buffer overflows and use-after-free | +| UBSAN | UndefinedBehaviorSanitizer — detects undefined C behavior | + +## Quick Start + +### Prerequisites + +Install required tools: + +```bash +# QEMU (Espressif fork with ESP32-S3 support) +git clone https://github.com/espressif/qemu.git +cd qemu && ./configure --target-list=xtensa-softmmu && make -j$(nproc) +export QEMU_PATH=/path/to/qemu/build/qemu-system-xtensa + +# ESP-IDF (for building firmware) +# See https://docs.espressif.com/projects/esp-idf/en/latest/esp32s3/get-started/ + +# Python tools +pip install esptool esp-idf-nvs-partition-gen + +# Coverage tools (optional, Layer 5) +sudo apt install lcov # Debian/Ubuntu +brew install lcov # macOS + +# Fuzz testing (optional, Layer 6) +sudo apt install clang # Debian/Ubuntu + +# Mesh testing (optional, Layer 3 — requires root) +sudo apt install socat bridge-utils iproute2 +``` + +### Run the Full Test Suite + +```bash +# Layer 2: Single-node test (build + run + validate) +bash scripts/qemu-esp32s3-test.sh + +# Layer 3: Multi-node mesh (3 nodes, requires root) +sudo bash scripts/qemu-mesh-test.sh 3 + +# Layer 6: Fuzz testing (60 seconds per target) +cd firmware/esp32-csi-node/test && make all CC=clang +make run_serialize FUZZ_DURATION=60 + +# Layer 7: Generate NVS test matrix +python3 scripts/generate_nvs_matrix.py --output-dir build/nvs_matrix + +# Layer 8: Snapshot regression tests +bash scripts/qemu-snapshot-test.sh --create +bash scripts/qemu-snapshot-test.sh --restore csi-streaming + +# Layer 9: Chaos/fault injection +bash scripts/qemu-chaos-test.sh --faults all --duration 120 +``` + +### Environment Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `QEMU_PATH` | `qemu-system-xtensa` | Path to Espressif QEMU binary | +| `QEMU_TIMEOUT` | `60` (single) / `45` (mesh) / `120` (chaos) | Test timeout in seconds | +| `SKIP_BUILD` | unset | Set to `1` to skip firmware build step | +| `NVS_BIN` | unset | Path to pre-built NVS partition binary | +| `QEMU_NET` | `1` | Set to `0` to disable SLIRP networking | +| `CHAOS_SEED` | current time | Seed for reproducible chaos testing | + +### Exit Codes (all scripts) + +| Code | Meaning | Action | +|------|---------|--------| +| 0 | PASS | All checks passed | +| 1 | WARN | Non-critical issues; review output | +| 2 | FAIL | Critical checks failed; fix and re-run | +| 3 | FATAL | Build error, crash, or missing tool; check prerequisites | + ## Decision Introduce a **comprehensive QEMU testing platform** for the ESP32-S3 CSI node firmware with nine capability layers: @@ -456,6 +548,53 @@ xtensa-esp-elf-gdb build/esp32-csi-node.elf \ -ex "continue" ``` +### Debugging Walkthrough + +**1. Start QEMU with GDB stub (paused at reset vector):** + +```bash +qemu-system-xtensa \ + -machine esp32s3 \ + -nographic \ + -drive file=build/qemu_flash.bin,if=mtd,format=raw \ + -serial mon:stdio \ + -s -S +# -s opens GDB server on localhost:1234 +# -S pauses CPU until GDB sends "continue" +``` + +**2. Connect from a second terminal:** + +```bash +xtensa-esp-elf-gdb build/esp32-csi-node.elf \ + -ex "target remote :1234" \ + -ex "b app_main" \ + -ex "continue" +``` + +**3. Set a breakpoint on DSP processing and inspect state:** + +``` +(gdb) b edge_processing.c:dsp_task +(gdb) continue +# ...breakpoint hit... +(gdb) print g_nvs_config +(gdb) print ring->head - ring->tail +(gdb) continue +``` + +**4. Connect from VS Code** using the `launch.json` config below (set breakpoints in the editor gutter, then press F5). + +**5. Dump gcov coverage data (requires `sdkconfig.coverage` overlay):** + +``` +(gdb) monitor gcov dump +# Writes .gcda files to the build directory. +# Then generate the HTML report on the host: +# lcov --capture --directory build --output-file coverage.info +# genhtml coverage.info --output-directory build/coverage_report +``` + ### Key Breakpoint Locations | Breakpoint | Purpose | diff --git a/scripts/check_health.py b/scripts/check_health.py index 09bb8a77..a25d1e89 100755 --- a/scripts/check_health.py +++ b/scripts/check_health.py @@ -256,6 +256,13 @@ def run_health_checks( def main(): parser = argparse.ArgumentParser( description="QEMU Post-Fault Health Checker — ADR-061 Layer 9", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=( + "Example output:\n" + " [HEALTHY] t=30s frames=150 (5.0 fps) crashes=0 heap_err=0 wdt=0 reboots=0\n" + " \n" + " VERDICT: Firmware is healthy. No critical issues detected." + ), ) parser.add_argument( "--log", required=True, diff --git a/scripts/generate_nvs_matrix.py b/scripts/generate_nvs_matrix.py index a8f84246..c4fff261 100644 --- a/scripts/generate_nvs_matrix.py +++ b/scripts/generate_nvs_matrix.py @@ -314,12 +314,21 @@ def generate_nvs_binary(csv_content: str, size: int) -> bytes: return f.read() # Last resort: try as a module - subprocess.check_call([ - sys.executable, "-m", "nvs_partition_gen", "generate", - csv_path, bin_path, hex(size) - ]) - with open(bin_path, "rb") as f: - return f.read() + try: + subprocess.check_call([ + sys.executable, "-m", "nvs_partition_gen", "generate", + csv_path, bin_path, hex(size) + ]) + with open(bin_path, "rb") as f: + return f.read() + except (subprocess.CalledProcessError, FileNotFoundError): + print("ERROR: NVS partition generator tool not found.", file=sys.stderr) + print("Install: pip install esp-idf-nvs-partition-gen", file=sys.stderr) + print("Or set IDF_PATH to your ESP-IDF installation", file=sys.stderr) + raise RuntimeError( + "NVS partition generator not available. " + "Install: pip install esp-idf-nvs-partition-gen" + ) finally: for p in (csv_path, bin_path): diff --git a/scripts/inject_fault.py b/scripts/inject_fault.py index 99c91dd4..bc002537 100755 --- a/scripts/inject_fault.py +++ b/scripts/inject_fault.py @@ -46,8 +46,12 @@ def connect_monitor(sock_path: str, timeout: float = CMD_TIMEOUT) -> socket.sock banner = s.recv(RECV_BUFSIZE).decode("utf-8", errors="replace") if banner: pass # Consume silently + else: + print(f"WARNING: Connected to {sock_path} but received no banner data. " + f"QEMU monitor may not be ready.", file=sys.stderr) except socket.timeout: - pass # No banner is OK + print(f"WARNING: Connected to {sock_path} but timed out waiting for banner " + f"after {timeout}s. QEMU monitor may be unresponsive.", file=sys.stderr) return s diff --git a/scripts/qemu-chaos-test.sh b/scripts/qemu-chaos-test.sh index cc708c90..b79b0b07 100755 --- a/scripts/qemu-chaos-test.sh +++ b/scripts/qemu-chaos-test.sh @@ -20,10 +20,52 @@ # FAULT_WAIT - Seconds to wait after fault injection (default: 5) # # Exit codes: -# 0 All faults handled gracefully -# 1 Some faults caused degraded state -# 2 Some faults caused failures -# 3 Fatal — firmware crashed or QEMU died +# 0 PASS — all checks passed +# 1 WARN — non-critical checks failed +# 2 FAIL — critical checks failed +# 3 FATAL — build error, crash, or infrastructure failure + +# ── Help ────────────────────────────────────────────────────────────── +usage() { + cat <<'HELP' +Usage: qemu-chaos-test.sh [OPTIONS] + +Launch firmware under QEMU and inject a series of faults to verify the +firmware's resilience. Each fault is injected via the QEMU monitor socket, +followed by a recovery window and health check. + +Fault types: + wifi_kill Pause/resume VM to simulate WiFi reconnect + ring_flood Inject 1000 rapid mock frames (ring buffer stress) + heap_pressure Write to heap metadata to simulate low memory + timer_starvation Pause VM for 500ms to starve FreeRTOS timers + corrupt_frame Inject a CSI frame with bad magic bytes + nvs_corrupt Write garbage to NVS flash region + +Options: + -h, --help Show this help message and exit + +Environment variables: + QEMU_PATH Path to qemu-system-xtensa (default: qemu-system-xtensa) + QEMU_TIMEOUT Boot timeout in seconds (default: 15) + FLASH_IMAGE Path to merged flash image (default: build/qemu_flash.bin) + FAULT_WAIT Seconds to wait after injection (default: 5) + +Examples: + ./qemu-chaos-test.sh + QEMU_TIMEOUT=30 FAULT_WAIT=10 ./qemu-chaos-test.sh + FLASH_IMAGE=/path/to/image.bin ./qemu-chaos-test.sh + +Exit codes: + 0 PASS — all checks passed + 1 WARN — non-critical checks failed + 2 FAIL — critical checks failed + 3 FATAL — build error, crash, or infrastructure failure +HELP + exit 0 +} + +case "${1:-}" in -h|--help) usage ;; esac set -euo pipefail @@ -160,16 +202,29 @@ echo "" if ! command -v "$QEMU_BIN" &>/dev/null; then echo "ERROR: QEMU binary not found: $QEMU_BIN" + echo " Install: sudo apt install qemu-system-misc # Debian/Ubuntu" + echo " Install: brew install qemu # macOS" + echo " Or set QEMU_PATH to the qemu-system-xtensa binary." exit 3 fi if ! command -v socat &>/dev/null; then - echo "ERROR: socat not found. Install socat for QEMU monitor communication." + echo "ERROR: socat not found (needed for QEMU monitor communication)." + echo " Install: sudo apt install socat # Debian/Ubuntu" + echo " Install: brew install socat # macOS" + exit 3 +fi + +if ! command -v python3 &>/dev/null; then + echo "ERROR: python3 not found (needed for fault injection scripts)." + echo " Install: sudo apt install python3 # Debian/Ubuntu" + echo " Install: brew install python # macOS" exit 3 fi if [ ! -f "$FLASH_IMAGE" ]; then echo "ERROR: Flash image not found: $FLASH_IMAGE" + echo "Run qemu-esp32s3-test.sh first to build the flash image." exit 3 fi diff --git a/scripts/qemu-esp32s3-test.sh b/scripts/qemu-esp32s3-test.sh index 4888bbff..d5420cca 100755 --- a/scripts/qemu-esp32s3-test.sh +++ b/scripts/qemu-esp32s3-test.sh @@ -12,10 +12,44 @@ # NVS_BIN - Path to a pre-built NVS binary to inject (optional) # # Exit codes: -# 0 All checks passed -# 1 Warnings (non-critical checks failed) -# 2 Errors (critical checks failed) -# 3 Fatal (crash detected or build failure) +# 0 PASS — all checks passed +# 1 WARN — non-critical checks failed +# 2 FAIL — critical checks failed +# 3 FATAL — build error, crash, or infrastructure failure + +# ── Help ────────────────────────────────────────────────────────────── +usage() { + cat <<'HELP' +Usage: qemu-esp32s3-test.sh [OPTIONS] + +Build ESP32-S3 firmware with mock CSI, merge binaries into a single flash +image, run under QEMU with a timeout, and validate the UART output. + +Options: + -h, --help Show this help message and exit + +Environment variables: + QEMU_PATH Path to qemu-system-xtensa (default: qemu-system-xtensa) + QEMU_TIMEOUT Timeout in seconds (default: 60) + SKIP_BUILD Set to "1" to skip idf.py build (default: unset) + NVS_BIN Path to pre-built NVS binary (optional) + QEMU_NET Set to "0" to disable networking (default: 1) + +Examples: + ./qemu-esp32s3-test.sh + SKIP_BUILD=1 ./qemu-esp32s3-test.sh + QEMU_PATH=/opt/qemu/bin/qemu-system-xtensa QEMU_TIMEOUT=120 ./qemu-esp32s3-test.sh + +Exit codes: + 0 PASS — all checks passed + 1 WARN — non-critical checks failed + 2 FAIL — critical checks failed + 3 FATAL — build error, crash, or infrastructure failure +HELP + exit 0 +} + +case "${1:-}" in -h|--help) usage ;; esac set -euo pipefail @@ -35,10 +69,33 @@ echo "QEMU binary: $QEMU_BIN" echo "Timeout: ${TIMEOUT_SEC}s" echo "" -# Verify QEMU is available +# ── Prerequisite checks ─────────────────────────────────────────────── if ! command -v "$QEMU_BIN" &>/dev/null; then echo "ERROR: QEMU binary not found: $QEMU_BIN" - echo "Set QEMU_PATH to the qemu-system-xtensa binary." + echo " Install: sudo apt install qemu-system-misc # Debian/Ubuntu" + echo " Install: brew install qemu # macOS" + echo " Or set QEMU_PATH to the qemu-system-xtensa binary." + exit 3 +fi + +if ! command -v python3 &>/dev/null; then + echo "ERROR: python3 not found." + echo " Install: sudo apt install python3 # Debian/Ubuntu" + echo " Install: brew install python # macOS" + exit 3 +fi + +if ! python3 -m esptool version &>/dev/null 2>&1; then + echo "ERROR: esptool not found (needed to merge flash binaries)." + echo " Install: pip install esptool" + exit 3 +fi + +# ── SKIP_BUILD precheck ────────────────────────────────────────────── +if [ "${SKIP_BUILD:-}" = "1" ] && [ ! -f "$BUILD_DIR/esp32-csi-node.bin" ]; then + echo "ERROR: SKIP_BUILD=1 but flash image not found: $BUILD_DIR/esp32-csi-node.bin" + echo "Build the firmware first: ./qemu-esp32s3-test.sh (without SKIP_BUILD)" + echo "Or unset SKIP_BUILD to build automatically." exit 3 fi diff --git a/scripts/qemu-mesh-test.sh b/scripts/qemu-mesh-test.sh index 64097398..9832bf8e 100644 --- a/scripts/qemu-mesh-test.sh +++ b/scripts/qemu-mesh-test.sh @@ -24,10 +24,52 @@ # - Rust workspace with wifi-densepose-hardware crate (aggregator binary) # # Exit codes: -# 0 All checks passed -# 1 Warnings (non-critical checks failed) -# 2 Errors (critical checks failed) -# 3 Fatal (build failure, crash, or infrastructure error) +# 0 PASS — all checks passed +# 1 WARN — non-critical checks failed +# 2 FAIL — critical checks failed +# 3 FATAL — build error, crash, or infrastructure failure + +# ── Help ────────────────────────────────────────────────────────────── +usage() { + cat <<'HELP' +Usage: sudo ./qemu-mesh-test.sh [OPTIONS] [N_NODES] + +Spawn N ESP32-S3 QEMU instances connected via a Linux bridge, each with +unique NVS provisioning (node ID, TDM slot), and a Rust aggregator that +collects frames from all nodes. + +NOTE: Requires root/sudo for TAP/bridge creation. + +Options: + -h, --help Show this help message and exit + +Positional: + N_NODES Number of mesh nodes (default: 3, minimum: 2) + +Environment variables: + QEMU_PATH Path to qemu-system-xtensa (default: qemu-system-xtensa) + QEMU_TIMEOUT Timeout in seconds (default: 45) + MESH_TIMEOUT Alias for QEMU_TIMEOUT (deprecated)(default: 45) + SKIP_BUILD Set to "1" to skip idf.py build (default: unset) + BRIDGE_NAME Bridge interface name (default: qemu-br0) + BRIDGE_SUBNET Bridge IP/mask (default: 10.0.0.1/24) + AGGREGATOR_PORT UDP port for aggregator (default: 5005) + +Examples: + sudo ./qemu-mesh-test.sh + sudo QEMU_TIMEOUT=90 ./qemu-mesh-test.sh 5 + sudo SKIP_BUILD=1 ./qemu-mesh-test.sh 4 + +Exit codes: + 0 PASS — all checks passed + 1 WARN — non-critical checks failed + 2 FAIL — critical checks failed + 3 FATAL — build error, crash, or infrastructure failure +HELP + exit 0 +} + +case "${1:-}" in -h|--help) usage ;; esac set -euo pipefail @@ -48,7 +90,7 @@ VALIDATE_SCRIPT="$SCRIPT_DIR/validate_mesh_test.py" # --------------------------------------------------------------------------- N_NODES="${1:-3}" QEMU_BIN="${QEMU_PATH:-qemu-system-xtensa}" -MESH_TIMEOUT="${MESH_TIMEOUT:-45}" +TIMEOUT="${QEMU_TIMEOUT:-${MESH_TIMEOUT:-45}}" BRIDGE="${BRIDGE_NAME:-qemu-br0}" BRIDGE_IP="${BRIDGE_SUBNET:-10.0.0.1/24}" AGG_PORT="${AGGREGATOR_PORT:-5005}" @@ -59,7 +101,7 @@ echo "Nodes: $N_NODES" echo "Bridge: $BRIDGE ($BRIDGE_IP)" echo "Aggregator: 0.0.0.0:$AGG_PORT" echo "QEMU binary: $QEMU_BIN" -echo "Timeout: ${MESH_TIMEOUT}s" +echo "Timeout: ${TIMEOUT}s" echo "" # --------------------------------------------------------------------------- @@ -72,12 +114,22 @@ fi if ! command -v "$QEMU_BIN" &>/dev/null; then echo "ERROR: QEMU binary not found: $QEMU_BIN" - echo "Set QEMU_PATH to the qemu-system-xtensa binary." + echo " Install: sudo apt install qemu-system-misc # Debian/Ubuntu" + echo " Install: brew install qemu # macOS" + echo " Or set QEMU_PATH to the qemu-system-xtensa binary." + exit 3 +fi + +if ! command -v python3 &>/dev/null; then + echo "ERROR: python3 not found." + echo " Install: sudo apt install python3 # Debian/Ubuntu" + echo " Install: brew install python # macOS" exit 3 fi if ! command -v ip &>/dev/null; then - echo "ERROR: 'ip' command not found. Install iproute2." + echo "ERROR: 'ip' command not found." + echo " Install: sudo apt install iproute2 # Debian/Ubuntu" exit 3 fi @@ -85,6 +137,20 @@ if ! command -v brctl &>/dev/null && ! ip link help bridge &>/dev/null 2>&1; the echo "WARNING: bridge-utils not found; will use 'ip link' for bridge creation." fi +if command -v socat &>/dev/null; then + true # optional, available +else + echo "NOTE: socat not found (optional, used for advanced monitor communication)." + echo " Install: sudo apt install socat # Debian/Ubuntu" + echo " Install: brew install socat # macOS" +fi + +if ! command -v cargo &>/dev/null; then + echo "ERROR: cargo not found (needed to build the Rust aggregator)." + echo " Install: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh" + exit 3 +fi + if [ "$(id -u)" -ne 0 ]; then echo "ERROR: This script must be run as root (for TAP/bridge creation)." echo "Usage: sudo $0 [N_NODES]" @@ -288,13 +354,13 @@ for i in $(seq 0 $((N_NODES - 1))); do done echo "" -echo "All nodes launched. Waiting ${MESH_TIMEOUT}s for mesh simulation..." +echo "All nodes launched. Waiting ${TIMEOUT}s for mesh simulation..." echo "" # --------------------------------------------------------------------------- # Wait for timeout # --------------------------------------------------------------------------- -sleep "$MESH_TIMEOUT" +sleep "$TIMEOUT" echo "Timeout reached. Stopping all processes..." diff --git a/scripts/qemu-snapshot-test.sh b/scripts/qemu-snapshot-test.sh index d35ca176..0e1d192b 100755 --- a/scripts/qemu-snapshot-test.sh +++ b/scripts/qemu-snapshot-test.sh @@ -16,10 +16,44 @@ # SKIP_SNAPSHOT - Set to "1" to run without snapshots (baseline timing) # # Exit codes: -# 0 All tests passed -# 1 Some tests had warnings -# 2 Some tests failed -# 3 Fatal error (QEMU failed to start, crash detected) +# 0 PASS — all checks passed +# 1 WARN — non-critical checks failed +# 2 FAIL — critical checks failed +# 3 FATAL — build error, crash, or infrastructure failure + +# ── Help ────────────────────────────────────────────────────────────── +usage() { + cat <<'HELP' +Usage: qemu-snapshot-test.sh [OPTIONS] + +Use QEMU VM snapshots to accelerate repeated test runs. Snapshots the VM +state after boot and after the first CSI frame, then restores from the +snapshot for each individual test (~2s vs ~15s per test). + +Options: + -h, --help Show this help message and exit + +Environment variables: + QEMU_PATH Path to qemu-system-xtensa (default: qemu-system-xtensa) + QEMU_TIMEOUT Per-test timeout in seconds (default: 10) + FLASH_IMAGE Path to merged flash image (default: build/qemu_flash.bin) + SKIP_SNAPSHOT Set to "1" to run without snapshots (baseline timing) + +Examples: + ./qemu-snapshot-test.sh + QEMU_TIMEOUT=20 ./qemu-snapshot-test.sh + FLASH_IMAGE=/path/to/image.bin ./qemu-snapshot-test.sh + +Exit codes: + 0 PASS — all checks passed + 1 WARN — non-critical checks failed + 2 FAIL — critical checks failed + 3 FATAL — build error, crash, or infrastructure failure +HELP + exit 0 +} + +case "${1:-}" in -h|--help) usage ;; esac set -euo pipefail @@ -165,12 +199,23 @@ echo "" if ! command -v "$QEMU_BIN" &>/dev/null; then echo "ERROR: QEMU binary not found: $QEMU_BIN" - echo "Set QEMU_PATH to the qemu-system-xtensa binary." + echo " Install: sudo apt install qemu-system-misc # Debian/Ubuntu" + echo " Install: brew install qemu # macOS" + echo " Or set QEMU_PATH to the qemu-system-xtensa binary." + exit 3 +fi + +if ! command -v qemu-img &>/dev/null; then + echo "ERROR: qemu-img not found (needed for snapshot disk management)." + echo " Install: sudo apt install qemu-utils # Debian/Ubuntu" + echo " Install: brew install qemu # macOS" exit 3 fi if ! command -v socat &>/dev/null; then - echo "ERROR: socat not found. Install socat for QEMU monitor communication." + echo "ERROR: socat not found (needed for QEMU monitor communication)." + echo " Install: sudo apt install socat # Debian/Ubuntu" + echo " Install: brew install socat # macOS" exit 3 fi diff --git a/scripts/validate_mesh_test.py b/scripts/validate_mesh_test.py index d8bb1f81..c75760af 100644 --- a/scripts/validate_mesh_test.py +++ b/scripts/validate_mesh_test.py @@ -196,12 +196,18 @@ def validate_mesh( # Load aggregator results if available results: Optional[dict] = None - if results_path and results_path.exists(): - try: - results = json.loads(results_path.read_text(encoding="utf-8")) - except (json.JSONDecodeError, OSError) as exc: - report.add("Results JSON", Severity.ERROR, - f"Failed to parse results: {exc}") + if results_path: + if not results_path.exists(): + print(f"WARNING: Aggregator results file not found: {results_path}", + file=sys.stderr) + report.add("Results JSON", Severity.WARN, + f"Results file not found: {results_path}") + else: + try: + results = json.loads(results_path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError) as exc: + report.add("Results JSON", Severity.ERROR, + f"Failed to parse results: {exc}") # Load per-node logs node_logs: Dict[int, str] = {} @@ -449,8 +455,14 @@ def validate_mesh( def main(): parser = argparse.ArgumentParser( description="Validate multi-node mesh QEMU test output (ADR-061 Layer 3)", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=( + "Examples:\n" + " python3 validate_mesh_test.py --nodes 3 --results mesh_results.json\n" + " python3 validate_mesh_test.py --nodes 4 --log node0.log --log node1.log" + ), ) - parser.add_argument("results", nargs="?", default=None, + parser.add_argument("--results", default=None, help="Path to mesh_test_results.json from aggregator") parser.add_argument("--nodes", "-n", type=int, required=True, help="Expected number of mesh nodes") diff --git a/scripts/validate_qemu_output.py b/scripts/validate_qemu_output.py index 5fb1d427..a6a11def 100644 --- a/scripts/validate_qemu_output.py +++ b/scripts/validate_qemu_output.py @@ -16,6 +16,7 @@ Exit codes: 3 Fatal (crash or corruption detected) """ +import argparse import re import sys from dataclasses import dataclass, field @@ -364,11 +365,18 @@ def validate_log(log_text: str) -> ValidationReport: def main(): - if len(sys.argv) < 2: - print(f"Usage: {sys.argv[0]} ", file=sys.stderr) - sys.exit(3) + parser = argparse.ArgumentParser( + description="Validate QEMU ESP32-S3 UART output (ADR-061)", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog="Example: python3 validate_qemu_output.py build/qemu_output.log", + ) + parser.add_argument( + "log_file", + help="Path to QEMU UART log file", + ) + args = parser.parse_args() - log_path = Path(sys.argv[1]) + log_path = Path(args.log_file) if not log_path.exists(): print(f"ERROR: Log file not found: {log_path}", file=sys.stderr) sys.exit(3)