diff --git a/.github/workflows/firmware-qemu.yml b/.github/workflows/firmware-qemu.yml index c44d26e2..dd52eda1 100644 --- a/.github/workflows/firmware-qemu.yml +++ b/.github/workflows/firmware-qemu.yml @@ -32,9 +32,9 @@ jobs: with: path: /opt/qemu-esp32 # Include date component so cache refreshes monthly when branch updates - key: qemu-esp32s3-${{ env.QEMU_BRANCH }}-v3-${{ github.run_id }} + key: qemu-esp32s3-${{ env.QEMU_BRANCH }}-v4 restore-keys: | - qemu-esp32s3-${{ env.QEMU_BRANCH }}-v3- + qemu-esp32s3-${{ env.QEMU_BRANCH }}- - name: Install QEMU build dependencies if: steps.cache-qemu.outputs.cache-hit != 'true' @@ -215,26 +215,23 @@ jobs: - name: Run serialize fuzzer (60s) working-directory: firmware/esp32-csi-node/test - run: make run_serialize FUZZ_DURATION=60 - continue-on-error: true + run: make run_serialize FUZZ_DURATION=60 || echo "FUZZER_CRASH=serialize" >> "$GITHUB_ENV" - name: Run edge enqueue fuzzer (60s) working-directory: firmware/esp32-csi-node/test - run: make run_edge FUZZ_DURATION=60 - continue-on-error: true + run: make run_edge FUZZ_DURATION=60 || echo "FUZZER_CRASH=edge" >> "$GITHUB_ENV" - name: Run NVS config fuzzer (60s) working-directory: firmware/esp32-csi-node/test - run: make run_nvs FUZZ_DURATION=60 - continue-on-error: true + run: make run_nvs FUZZ_DURATION=60 || echo "FUZZER_CRASH=nvs" >> "$GITHUB_ENV" - name: Check for crashes working-directory: firmware/esp32-csi-node/test run: | - CRASHES=$(find . -type f -name "crash-*" -o -type f -name "oom-*" -o -type f -name "timeout-*" 2>/dev/null | wc -l) + CRASHES=$(find . -type f \( -name "crash-*" -o -name "oom-*" -o -name "timeout-*" \) 2>/dev/null | wc -l) echo "Crash artifacts found: $CRASHES" - if [ "$CRASHES" -gt 0 ]; then - echo "::error::Fuzzer found $CRASHES crash/oom/timeout artifacts" + if [ "$CRASHES" -gt 0 ] || [ -n "${FUZZER_CRASH:-}" ]; then + echo "::error::Fuzzer found $CRASHES crash/oom/timeout artifacts. FUZZER_CRASH=${FUZZER_CRASH:-none}" ls -la crash-* oom-* timeout-* 2>/dev/null exit 1 fi diff --git a/.vscode/launch.json b/.vscode/launch.json index b46a88a1..d12f2c20 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -45,14 +45,5 @@ } ] } - ], - "compounds": [ - { - "name": "QEMU: Launch + Debug", - "configurations": [ - "QEMU ESP32-S3 Debug", - "QEMU ESP32-S3 Debug (attach)" - ] - } ] } diff --git a/docs/adr/ADR-061-qemu-esp32s3-firmware-testing.md b/docs/adr/ADR-061-qemu-esp32s3-firmware-testing.md index 64aa1ace..6811cb7a 100644 --- a/docs/adr/ADR-061-qemu-esp32s3-firmware-testing.md +++ b/docs/adr/ADR-061-qemu-esp32s3-firmware-testing.md @@ -237,7 +237,7 @@ This model exercises: | 5 | Channel sweep | 5s | Frames on channels 1, 6, 11 in sequence | | 6 | MAC filter test | 5s | Frames with wrong MAC are dropped (counter check) | | 7 | Ring buffer overflow | 3s | 1000 frames in 100ms burst, graceful drop | -| 8 | Boundary RSSI | 5s | RSSI sweeps -127 to 0, no crash | +| 8 | Boundary RSSI | 5s | RSSI sweeps -90 to -10 dBm, no crash | | 9 | Zero-length frame | 2s | `iq_len=0` frames, serialize returns 0 | --- diff --git a/firmware/esp32-csi-node/main/mock_csi.c b/firmware/esp32-csi-node/main/mock_csi.c index 619f0773..b2a95fce 100644 --- a/firmware/esp32-csi-node/main/mock_csi.c +++ b/firmware/esp32-csi-node/main/mock_csi.c @@ -80,7 +80,7 @@ static const char *TAG = "mock_csi"; /** Pi constant. */ #ifndef M_PI -#define M_PI 3.14159265358979323846f +#define M_PI 3.14159265358979323846 #endif /* ---- Channel sweep table ---- */ @@ -101,7 +101,7 @@ static const uint8_t s_bad_mac[6] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66}; /** * 32-bit Galois LFSR for deterministic pseudo-random noise. * Avoids stdlib rand() which may not be available on ESP32 bare-metal. - * Taps: bits 32, 22, 2, 1 (maximal-length polynomial). + * Taps: bits 32, 31, 29, 1 (Galois LFSR polynomial 0xD0000001). */ static uint32_t s_lfsr = 0xDEADBEEF; @@ -110,7 +110,7 @@ static uint32_t lfsr_next(void) uint32_t lsb = s_lfsr & 1u; s_lfsr >>= 1; if (lsb) { - s_lfsr ^= 0xD0000001u; /* x^32 + x^22 + x^2 + x^1 */ + s_lfsr ^= 0xD0000001u; /* x^32 + x^31 + x^29 + x^1 */ } return s_lfsr; } @@ -130,6 +130,12 @@ static float lfsr_float(void) static mock_state_t s_state; static esp_timer_handle_t s_timer = NULL; +/** Tracks whether the MAC filter has been set up in gen_mac_filter. */ +static bool s_mac_filter_initialized = false; + +/** Tracks whether the overflow burst has fired in gen_ring_overflow. */ +static bool s_overflow_burst_done = false; + /* External NVS config (for MAC filter scenario). */ extern nvs_config_t g_nvs_config; @@ -157,9 +163,9 @@ static float channel_to_lambda(uint8_t channel) /* ---- Helper: elapsed ms since scenario start ---- */ -static uint32_t scenario_elapsed_ms(void) +static int64_t scenario_elapsed_ms(void) { - uint32_t now = (uint32_t)(esp_timer_get_time() / 1000); + int64_t now = esp_timer_get_time() / 1000; return now - s_state.scenario_start_ms; } @@ -277,7 +283,7 @@ static void gen_walking(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi) */ static void gen_fall(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi) { - uint32_t elapsed = scenario_elapsed_ms(); + int64_t elapsed = scenario_elapsed_ms(); uint32_t duration = CONFIG_CSI_MOCK_SCENARIO_DURATION_MS; /* Fall occurs at 70% of scenario duration. */ @@ -403,7 +409,6 @@ static void gen_mac_filter(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi, bool *skip_inject) { /* Set up the filter MAC to match s_good_mac on first frame of this scenario. */ - static bool s_mac_filter_initialized = false; if (!s_mac_filter_initialized) { memcpy(g_nvs_config.filter_mac, s_good_mac, 6); g_nvs_config.filter_mac_set = 1; @@ -439,10 +444,10 @@ static void gen_ring_overflow(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi, *channel = 6; *rssi = -50; - /* Only burst on the first timer tick of this scenario. */ - uint32_t elapsed = scenario_elapsed_ms(); - if (elapsed < MOCK_CSI_INTERVAL_MS + 10) { + /* Burst once on the first timer tick of this scenario. */ + if (!s_overflow_burst_done) { *burst_count = OVERFLOW_BURST_COUNT; + s_overflow_burst_done = true; } else { *burst_count = 1; } @@ -454,7 +459,7 @@ static void gen_ring_overflow(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi, */ static void gen_boundary_rssi(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi) { - uint32_t elapsed = scenario_elapsed_ms(); + int64_t elapsed = scenario_elapsed_ms(); uint32_t duration = CONFIG_CSI_MOCK_SCENARIO_DURATION_MS; /* Linear sweep: -90 to -10 dBm. */ @@ -492,7 +497,7 @@ static void advance_scenario(void) } s_state.scenario = s_state.all_idx; - s_state.scenario_start_ms = (uint32_t)(esp_timer_get_time() / 1000); + s_state.scenario_start_ms = esp_timer_get_time() / 1000; /* Reset per-scenario state. */ s_state.person_x = 1.0f; @@ -521,7 +526,7 @@ static void mock_timer_cb(void *arg) if (s_state.scenario == MOCK_SCENARIO_ALL || (s_state.all_idx > 0 && s_state.all_idx < MOCK_SCENARIO_COUNT)) { /* We're running in sequential mode. */ - uint32_t elapsed = scenario_elapsed_ms(); + int64_t elapsed = scenario_elapsed_ms(); if (elapsed >= CONFIG_CSI_MOCK_SCENARIO_DURATION_MS) { advance_scenario(); } @@ -619,8 +624,10 @@ esp_err_t mock_csi_init(uint8_t scenario) s_state.person_speed = WALK_SPEED_MS; s_state.person2_x = 4.0f; s_state.person2_speed = WALK_SPEED_MS * 0.6f; - s_state.scenario_start_ms = (uint32_t)(esp_timer_get_time() / 1000); + s_state.scenario_start_ms = esp_timer_get_time() / 1000; s_all_done = false; + s_mac_filter_initialized = false; + s_overflow_burst_done = false; /* Reset LFSR to deterministic seed. */ s_lfsr = 0xDEADBEEF; diff --git a/firmware/esp32-csi-node/main/mock_csi.h b/firmware/esp32-csi-node/main/mock_csi.h index 2261f29e..26bb8b68 100644 --- a/firmware/esp32-csi-node/main/mock_csi.h +++ b/firmware/esp32-csi-node/main/mock_csi.h @@ -70,7 +70,7 @@ typedef struct { float person2_speed; /**< Second person movement speed. */ uint8_t channel_idx; /**< Index into channel sweep table. */ int8_t rssi_sweep; /**< Current RSSI for boundary sweep. */ - uint32_t scenario_start_ms; /**< Timestamp when current scenario started. */ + int64_t scenario_start_ms; /**< Timestamp when current scenario started. */ uint8_t all_idx; /**< Current scenario index in SCENARIO_ALL mode. */ } mock_state_t; diff --git a/firmware/esp32-csi-node/sdkconfig.coverage b/firmware/esp32-csi-node/sdkconfig.coverage index 79844f03..75e5ee81 100644 --- a/firmware/esp32-csi-node/sdkconfig.coverage +++ b/firmware/esp32-csi-node/sdkconfig.coverage @@ -40,6 +40,13 @@ CONFIG_CSI_TARGET_IP="10.0.2.2" CONFIG_CSI_MOCK_SCENARIO_DURATION_MS=5000 CONFIG_CSI_MOCK_LOG_FRAMES=y +# --------------------------------------------------------------------------- +# FreeRTOS and watchdog: match sdkconfig.qemu for QEMU timing tolerance +# --------------------------------------------------------------------------- +CONFIG_FREERTOS_TIMER_TASK_STACK_DEPTH=4096 +CONFIG_ESP_TASK_WDT_TIMEOUT_S=30 +CONFIG_ESP_INT_WDT_TIMEOUT_MS=800 + # --------------------------------------------------------------------------- # Logging and display # --------------------------------------------------------------------------- diff --git a/firmware/esp32-csi-node/test/Makefile b/firmware/esp32-csi-node/test/Makefile index df481b97..c14f0383 100644 --- a/firmware/esp32-csi-node/test/Makefile +++ b/firmware/esp32-csi-node/test/Makefile @@ -61,19 +61,19 @@ fuzz_nvs: fuzz_nvs_config.c $(STUBS_SRC) # --- Run targets --- run_serialize: fuzz_serialize - @mkdir -p corpus - ./fuzz_serialize corpus/ -max_total_time=$(FUZZ_DURATION) -max_len=2048 -jobs=$(FUZZ_JOBS) + @mkdir -p corpus_serialize + ./fuzz_serialize corpus_serialize/ -max_total_time=$(FUZZ_DURATION) -max_len=2048 -jobs=$(FUZZ_JOBS) run_edge: fuzz_edge - @mkdir -p corpus - ./fuzz_edge corpus/ -max_total_time=$(FUZZ_DURATION) -max_len=4096 -jobs=$(FUZZ_JOBS) + @mkdir -p corpus_edge + ./fuzz_edge corpus_edge/ -max_total_time=$(FUZZ_DURATION) -max_len=4096 -jobs=$(FUZZ_JOBS) run_nvs: fuzz_nvs - @mkdir -p corpus - ./fuzz_nvs corpus/ -max_total_time=$(FUZZ_DURATION) -max_len=256 -jobs=$(FUZZ_JOBS) + @mkdir -p corpus_nvs + ./fuzz_nvs corpus_nvs/ -max_total_time=$(FUZZ_DURATION) -max_len=256 -jobs=$(FUZZ_JOBS) run_all: run_serialize run_edge run_nvs clean: rm -f fuzz_serialize fuzz_edge fuzz_nvs - rm -rf corpus/ + rm -rf corpus_serialize/ corpus_edge/ corpus_nvs/ diff --git a/scripts/generate_nvs_matrix.py b/scripts/generate_nvs_matrix.py index c4fff261..3f2c4ae5 100644 --- a/scripts/generate_nvs_matrix.py +++ b/scripts/generate_nvs_matrix.py @@ -331,7 +331,7 @@ def generate_nvs_binary(csv_content: str, size: int) -> bytes: ) finally: - for p in (csv_path, bin_path): + for p in set((csv_path, bin_path)): # deduplicate in case paths are identical if os.path.isfile(p): os.unlink(p) diff --git a/scripts/inject_fault.py b/scripts/inject_fault.py index bc002537..b6101ded 100755 --- a/scripts/inject_fault.py +++ b/scripts/inject_fault.py @@ -18,6 +18,8 @@ Usage: """ import argparse +import os +import random import socket import sys import time @@ -123,31 +125,24 @@ def fault_ring_flood(s: socket.socket) -> None: print(f"[ring_flood] Injected: {sent}/1000 rapid NMI triggers") -def fault_heap_exhaust(s: socket.socket) -> None: - """Write to heap tracking metadata to simulate memory pressure. +def fault_heap_exhaust(s: socket.socket, flash_path: str = None) -> None: + """Simulate memory pressure by pausing VM to trigger watchdog/heap checks. - ESP32-S3 DRAM starts at 0x3FC88000. We write a pattern to the - heap control block area to simulate low-memory conditions. The - firmware's heap_caps checks should detect the anomaly. + Actual heap memory writes require a GDB stub (-gdb tcp::1234). + This function probes the heap region and pauses the VM to stress + heap management as a realistic simulation. """ - # ESP32-S3 internal DRAM heap region heap_base = 0x3FC88000 - # Write a pattern that looks like an exhausted free-list - # (all zeros in the next-free pointer) - print(f"[heap_exhaust] Writing to heap metadata at 0x{heap_base:08X}...") - # Use QEMU monitor 'memsave' and 'pmemsave' aren't writable; - # use 'xp' to read and 'poke' (if available) or GDB memory write - # Fallback: use the monitor 'x' command to at least probe the region + print("[heap_exhaust] Probing heap region...") resp = send_cmd(s, f"xp /4xw 0x{heap_base:08x}") - print(f"[heap_exhaust] Current heap header: {resp.strip()}") - - # Attempt to write garbage via 'write' monitor command (QEMU 8.x+) - # Format: write - garbage = "DEADBEEF" * 4 # 16 bytes of garbage - resp = send_cmd(s, f"pmemsave 0x{heap_base:08x} 16 /dev/null") - # Try direct memory write if supported - resp = send_cmd(s, f"x /1xw 0x{heap_base:08x}") - print(f"[heap_exhaust] Injected: heap metadata perturbation at 0x{heap_base:08X}") + print(f"[heap_exhaust] Heap header: {resp.strip()}") + # Pause VM to stress memory management + print("[heap_exhaust] Pausing VM for 3s to stress heap management...") + send_cmd(s, "stop") + time.sleep(3.0) + send_cmd(s, "cont") + print("[heap_exhaust] WARNING: Actual heap corruption requires GDB stub (-gdb tcp::1234)") + print("[heap_exhaust] Injected: 3s VM pause (simulates memory pressure)") def fault_timer_starvation(s: socket.socket) -> None: @@ -159,51 +154,47 @@ def fault_timer_starvation(s: socket.socket) -> None: print("[timer_starvation] Injected: 500ms execution pause") -def fault_corrupt_frame(s: socket.socket) -> None: - """Write bad magic bytes to CSI frame buffer area. +def fault_corrupt_frame(s: socket.socket, flash_path: str = None) -> None: + """Simulate CSI frame corruption by pausing VM during frame processing. - Mock CSI frames use a magic prefix (0xCSIF or similar). We write - an invalid magic to the frame staging buffer so the parser - encounters corruption on the next read. + Actual memory writes to the frame buffer require a GDB stub + (-gdb tcp::1234). This function probes the frame buffer region + and pauses the VM mid-frame to simulate corruption effects. """ - # Mock CSI buffer is typically in .bss — use a known SRAM region - # ESP32-S3 SRAM1: 0x3FC88000 - 0x3FCF0000 - # Pick an offset likely to hit the frame staging area frame_buf_addr = 0x3FCA0000 - print(f"[corrupt_frame] Writing bad magic to 0x{frame_buf_addr:08X}...") - - # Write 0xDEADCAFE where the frame magic should be 0x43534946 ("CSIF") - # QEMU monitor: attempt memory write + print(f"[corrupt_frame] Probing frame buffer at 0x{frame_buf_addr:08X}...") resp = send_cmd(s, f"xp /4xb 0x{frame_buf_addr:08x}") - print(f"[corrupt_frame] Before: {resp.strip()}") - - # Use GDB-style memory write if available, otherwise log the attempt - # The actual write depends on QEMU version and GDB stub availability - resp = send_cmd(s, f"x /1xw 0x{frame_buf_addr:08x}") - print(f"[corrupt_frame] Injected: bad magic bytes at 0x{frame_buf_addr:08X}") + print(f"[corrupt_frame] Frame buffer: {resp.strip()}") + # Pause VM briefly to disrupt frame processing timing + print("[corrupt_frame] Pausing VM for 1s to disrupt frame processing...") + send_cmd(s, "stop") + time.sleep(1.0) + send_cmd(s, "cont") + print("[corrupt_frame] WARNING: Actual frame corruption requires GDB stub (-gdb tcp::1234)") + print(f"[corrupt_frame] Injected: 1s VM pause during frame processing") -def fault_nvs_corrupt(s: socket.socket) -> None: - """Write garbage to the NVS flash region. +def fault_nvs_corrupt(s: socket.socket, flash_path: str = None) -> None: + """Write garbage to the NVS flash region on disk. - NVS partition is at flash offset 0x9000. Under QEMU, the flash is - memory-mapped. We write garbage to the NVS page header to trigger - NVS corruption detection on next read. + When a flash image path is provided, writes random bytes directly + to the NVS partition offset (0x9000) in the flash image file. + Without a flash path, falls back to a read-only probe via monitor. """ - # ESP32-S3 flash is mapped at 0x3C000000 (instruction) / 0x3D000000 (data) - # NVS at flash offset 0x9000 maps to 0x3C009000 in QEMU memory - nvs_flash_addr = 0x3C009000 - print(f"[nvs_corrupt] Writing garbage to NVS region 0x{nvs_flash_addr:08X}...") - - # Read current NVS header - resp = send_cmd(s, f"xp /8xb 0x{nvs_flash_addr:08x}") - print(f"[nvs_corrupt] NVS header before: {resp.strip()}") - - # Attempt to corrupt the NVS page header (first 32 bytes) - # NVS page magic is 0xFE (active) or 0xFC (full) - # Writing 0x00 makes it appear as an uninitialized page - resp = send_cmd(s, f"x /1xw 0x{nvs_flash_addr:08x}") - print(f"[nvs_corrupt] Injected: NVS region corruption at 0x{nvs_flash_addr:08X}") + if flash_path and os.path.isfile(flash_path): + nvs_offset = 0x9000 + garbage = bytes(random.randint(0, 255) for _ in range(16)) + with open(flash_path, "r+b") as f: + f.seek(nvs_offset) + f.write(garbage) + print(f"[nvs_corrupt] Wrote 16 garbage bytes at flash offset 0x{nvs_offset:X}") + print(f"[nvs_corrupt] Flash image: {flash_path}") + else: + # Fallback: attempt via monitor (read-only probe) + resp = send_cmd(s, f"xp /8xb 0x3C009000") + print(f"[nvs_corrupt] NVS region (read-only probe): {resp.strip()}") + print(f"[nvs_corrupt] WARNING: No --flash path provided; NVS corruption was NOT injected") + print(f"[nvs_corrupt] Pass --flash /path/to/flash.bin for actual corruption") # Map fault names to injection functions @@ -235,6 +226,10 @@ def main(): "--timeout", type=float, default=CMD_TIMEOUT, help=f"Per-command timeout in seconds (default: {CMD_TIMEOUT})", ) + parser.add_argument( + "--flash", default=None, + help="Path to flash image (for nvs_corrupt direct file writes)", + ) args = parser.parse_args() print(f"[inject_fault] Connecting to {args.socket}...") @@ -242,7 +237,14 @@ def main(): print(f"[inject_fault] Injecting fault: {args.fault}") try: - FAULT_MAP[args.fault](s) + fault_fn = FAULT_MAP[args.fault] + # Pass flash_path to faults that accept it + import inspect + sig = inspect.signature(fault_fn) + if "flash_path" in sig.parameters: + fault_fn(s, flash_path=args.flash) + else: + fault_fn(s) except Exception as e: print(f"ERROR: Fault injection failed: {e}", file=sys.stderr) s.close() diff --git a/scripts/qemu-chaos-test.sh b/scripts/qemu-chaos-test.sh index b79b0b07..7cdd5776 100755 --- a/scripts/qemu-chaos-test.sh +++ b/scripts/qemu-chaos-test.sh @@ -8,7 +8,7 @@ # Fault types: # 1. wifi_kill — Pause/resume VM to simulate WiFi reconnect # 2. ring_flood — Inject 1000 rapid mock frames (ring buffer stress) -# 3. heap_pressure — Write to heap metadata to simulate low memory +# 3. heap_exhaust — Write to heap metadata to simulate low memory # 4. timer_starvation — Pause VM for 500ms to starve FreeRTOS timers # 5. corrupt_frame — Inject a CSI frame with bad magic bytes # 6. nvs_corrupt — Write garbage to NVS flash region @@ -37,7 +37,7 @@ followed by a recovery window and health check. Fault types: wifi_kill Pause/resume VM to simulate WiFi reconnect ring_flood Inject 1000 rapid mock frames (ring buffer stress) - heap_pressure Write to heap metadata to simulate low memory + heap_exhaust Write to heap metadata to simulate low memory timer_starvation Pause VM for 500ms to starve FreeRTOS timers corrupt_frame Inject a CSI frame with bad magic bytes nvs_corrupt Write garbage to NVS flash region @@ -84,7 +84,7 @@ UART_LOG="$LOG_DIR/qemu_uart.log" QEMU_PID="" # Fault definitions -FAULTS=("wifi_kill" "ring_flood" "heap_pressure" "timer_starvation" "corrupt_frame" "nvs_corrupt") +FAULTS=("wifi_kill" "ring_flood" "heap_exhaust" "timer_starvation" "corrupt_frame" "nvs_corrupt") declare -a FAULT_RESULTS=() # ────────────────────────────────────────────────────────────────────── @@ -152,10 +152,10 @@ inject_ring_flood() { --fault ring_flood } -inject_heap_pressure() { - # Use monitor to simulate memory pressure by writing to heap tracking - # regions. The firmware's heap checks should detect and handle this. - echo " [inject] Simulating heap pressure via memory write..." +inject_heap_exhaust() { + # Simulate memory pressure by pausing the VM to stress heap management. + # Actual heap memory writes require GDB stub. + echo " [inject] Simulating heap pressure via VM pause..." python3 "$SCRIPT_DIR/inject_fault.py" \ --socket "$MONITOR_SOCK" \ --fault heap_exhaust @@ -180,12 +180,13 @@ inject_corrupt_frame() { } inject_nvs_corrupt() { - # Write garbage to the NVS flash region (offset 0x9000). + # Write garbage to the NVS flash region (offset 0x9000) via direct file write. # The firmware should detect NVS corruption and fall back to defaults. echo " [inject] Corrupting NVS flash region..." python3 "$SCRIPT_DIR/inject_fault.py" \ --socket "$MONITOR_SOCK" \ - --fault nvs_corrupt + --fault nvs_corrupt \ + --flash "$FLASH_IMAGE" } # ────────────────────────────────────────────────────────────────────── @@ -307,7 +308,7 @@ for fault in "${FAULTS[@]}"; do case "$fault" in wifi_kill) inject_wifi_kill ;; ring_flood) inject_ring_flood ;; - heap_pressure) inject_heap_pressure ;; + heap_exhaust) inject_heap_exhaust ;; timer_starvation) inject_timer_starvation ;; corrupt_frame) inject_corrupt_frame ;; nvs_corrupt) inject_nvs_corrupt ;; diff --git a/scripts/qemu-mesh-test.sh b/scripts/qemu-mesh-test.sh index 9832bf8e..7dc25fc7 100644 --- a/scripts/qemu-mesh-test.sh +++ b/scripts/qemu-mesh-test.sh @@ -11,7 +11,8 @@ # # Environment variables: # QEMU_PATH - Path to qemu-system-xtensa (default: qemu-system-xtensa) -# MESH_TIMEOUT - Timeout in seconds (default: 45) +# QEMU_TIMEOUT - Timeout in seconds (default: 45) +# MESH_TIMEOUT - Deprecated alias for QEMU_TIMEOUT # SKIP_BUILD - Set to "1" to skip the idf.py build step # BRIDGE_NAME - Bridge interface name (default: qemu-br0) # BRIDGE_SUBNET - Bridge IP/mask (default: 10.0.0.1/24) @@ -391,7 +392,7 @@ VALIDATE_ARGS=("--nodes" "$N_NODES") # Pass results file if it was produced if [ -f "$RESULTS_FILE" ]; then - VALIDATE_ARGS+=("$RESULTS_FILE") + VALIDATE_ARGS+=("--results" "$RESULTS_FILE") else echo "WARNING: Aggregator results file not found: $RESULTS_FILE" echo "Validation will rely on node logs only." diff --git a/scripts/qemu-snapshot-test.sh b/scripts/qemu-snapshot-test.sh index 0e1d192b..9ce8fa4a 100755 --- a/scripts/qemu-snapshot-test.sh +++ b/scripts/qemu-snapshot-test.sh @@ -97,9 +97,11 @@ trap cleanup EXIT INT TERM # ────────────────────────────────────────────────────────────────────── now_ms() { - # Millisecond timestamp (portable: uses date +%s%N on Linux, perl fallback) - if date +%s%N &>/dev/null; then - echo $(( $(date +%s%N) / 1000000 )) + # Millisecond timestamp (portable: Linux date +%s%N, macOS perl fallback) + local ns + ns=$(date +%s%N 2>/dev/null) + if [[ "$ns" =~ ^[0-9]+$ ]]; then + echo $(( ns / 1000000 )) else perl -MTime::HiRes=time -e 'printf "%d\n", time()*1000' 2>/dev/null || \ echo $(( $(date +%s) * 1000 )) @@ -284,15 +286,15 @@ for test_name in "${TESTS[@]}"; do # Restore to post_first_frame state restore_snapshot "post_first_frame" - # Clear the UART log for this test segment - > "$LOG_DIR/qemu_uart.log" + # Record current log length so we can extract only new lines + pre_lines=$(wc -l < "$LOG_DIR/qemu_uart.log" 2>/dev/null || echo 0) # Let execution continue for TIMEOUT_SEC seconds echo "[test] Running for ${TIMEOUT_SEC}s..." sleep "$TIMEOUT_SEC" - # Capture the log segment for this test - cp "$LOG_DIR/qemu_uart.log" "$test_log" + # Capture only the new log lines produced during this test + tail -n +$((pre_lines + 1)) "$LOG_DIR/qemu_uart.log" > "$test_log" t_end=$(now_ms) elapsed_ms=$((t_end - t_start)) diff --git a/scripts/validate_qemu_output.py b/scripts/validate_qemu_output.py index a6a11def..26291fe9 100644 --- a/scripts/validate_qemu_output.py +++ b/scripts/validate_qemu_output.py @@ -3,8 +3,9 @@ QEMU ESP32-S3 UART Output Validator (ADR-061) Parses the UART log captured from a QEMU firmware run and validates -14 checks covering boot, NVS, mock CSI, edge processing, vitals, -presence/fall detection, serialization, and crash indicators. +16 checks covering boot, NVS, mock CSI, edge processing, vitals, +presence/fall detection, serialization, crash indicators, scenario +completion, and frame rate sanity. Usage: python3 validate_qemu_output.py @@ -120,7 +121,7 @@ class ValidationReport: def validate_log(log_text: str) -> ValidationReport: - """Run all 14 validation checks against the UART log text.""" + """Run all 16 validation checks against the UART log text.""" report = ValidationReport() lines = log_text.splitlines() log_lower = log_text.lower()