fix(qemu): resolve 23 bugs from deep code review
CRITICAL: - inject_fault.py: make nvs_corrupt write actual bytes via --flash arg; heap_exhaust and corrupt_frame now pause VM with honest WARNING about GDB stub requirement for real memory writes - firmware-qemu.yml: remove github.run_id from cache key (was causing 100% cache miss rate, rebuilding QEMU every run) - mock_csi.c: change scenario_elapsed_ms() to int64_t (uint32 wrapped at ~49 days) HIGH: - qemu-mesh-test.sh: pass --results flag to validate_mesh_test.py (was passing positional arg to named-only parameter) - test/Makefile: separate corpus directories per fuzz target (corpus_serialize/, corpus_edge/, corpus_nvs/) - qemu-snapshot-test.sh: replace log truncation with tail-based extraction (truncation created sparse file while QEMU held fd) MEDIUM: - mock_csi.c: reset s_mac_filter_initialized in mock_csi_init() - mock_csi.c: fix LFSR polynomial comment (32,31,29,1 not 32,22,2,1) - sdkconfig.coverage: add FreeRTOS timer stack 4096 and WDT tuning - firmware-qemu.yml: replace continue-on-error with FUZZER_CRASH env - qemu-chaos-test.sh: rename heap_pressure to heap_exhaust for consistency - validate_qemu_output.py: fix docstring "14 checks" -> "16 checks" - generate_nvs_matrix.py: deduplicate temp file cleanup paths LOW: - mock_csi.c: remove M_PI float suffix, fix overflow burst flag - qemu-snapshot-test.sh: fix now_ms() for macOS date +%s%N - ADR-061: fix scenario 8 RSSI range to -90...-10 dBm - launch.json: remove contradictory compound debug config Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
1dbea4e9fb
commit
e574cbe129
|
|
@ -32,9 +32,9 @@ jobs:
|
|||
with:
|
||||
path: /opt/qemu-esp32
|
||||
# Include date component so cache refreshes monthly when branch updates
|
||||
key: qemu-esp32s3-${{ env.QEMU_BRANCH }}-v3-${{ github.run_id }}
|
||||
key: qemu-esp32s3-${{ env.QEMU_BRANCH }}-v4
|
||||
restore-keys: |
|
||||
qemu-esp32s3-${{ env.QEMU_BRANCH }}-v3-
|
||||
qemu-esp32s3-${{ env.QEMU_BRANCH }}-
|
||||
|
||||
- name: Install QEMU build dependencies
|
||||
if: steps.cache-qemu.outputs.cache-hit != 'true'
|
||||
|
|
@ -215,26 +215,23 @@ jobs:
|
|||
|
||||
- name: Run serialize fuzzer (60s)
|
||||
working-directory: firmware/esp32-csi-node/test
|
||||
run: make run_serialize FUZZ_DURATION=60
|
||||
continue-on-error: true
|
||||
run: make run_serialize FUZZ_DURATION=60 || echo "FUZZER_CRASH=serialize" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Run edge enqueue fuzzer (60s)
|
||||
working-directory: firmware/esp32-csi-node/test
|
||||
run: make run_edge FUZZ_DURATION=60
|
||||
continue-on-error: true
|
||||
run: make run_edge FUZZ_DURATION=60 || echo "FUZZER_CRASH=edge" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Run NVS config fuzzer (60s)
|
||||
working-directory: firmware/esp32-csi-node/test
|
||||
run: make run_nvs FUZZ_DURATION=60
|
||||
continue-on-error: true
|
||||
run: make run_nvs FUZZ_DURATION=60 || echo "FUZZER_CRASH=nvs" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Check for crashes
|
||||
working-directory: firmware/esp32-csi-node/test
|
||||
run: |
|
||||
CRASHES=$(find . -type f -name "crash-*" -o -type f -name "oom-*" -o -type f -name "timeout-*" 2>/dev/null | wc -l)
|
||||
CRASHES=$(find . -type f \( -name "crash-*" -o -name "oom-*" -o -name "timeout-*" \) 2>/dev/null | wc -l)
|
||||
echo "Crash artifacts found: $CRASHES"
|
||||
if [ "$CRASHES" -gt 0 ]; then
|
||||
echo "::error::Fuzzer found $CRASHES crash/oom/timeout artifacts"
|
||||
if [ "$CRASHES" -gt 0 ] || [ -n "${FUZZER_CRASH:-}" ]; then
|
||||
echo "::error::Fuzzer found $CRASHES crash/oom/timeout artifacts. FUZZER_CRASH=${FUZZER_CRASH:-none}"
|
||||
ls -la crash-* oom-* timeout-* 2>/dev/null
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -45,14 +45,5 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"compounds": [
|
||||
{
|
||||
"name": "QEMU: Launch + Debug",
|
||||
"configurations": [
|
||||
"QEMU ESP32-S3 Debug",
|
||||
"QEMU ESP32-S3 Debug (attach)"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -237,7 +237,7 @@ This model exercises:
|
|||
| 5 | Channel sweep | 5s | Frames on channels 1, 6, 11 in sequence |
|
||||
| 6 | MAC filter test | 5s | Frames with wrong MAC are dropped (counter check) |
|
||||
| 7 | Ring buffer overflow | 3s | 1000 frames in 100ms burst, graceful drop |
|
||||
| 8 | Boundary RSSI | 5s | RSSI sweeps -127 to 0, no crash |
|
||||
| 8 | Boundary RSSI | 5s | RSSI sweeps -90 to -10 dBm, no crash |
|
||||
| 9 | Zero-length frame | 2s | `iq_len=0` frames, serialize returns 0 |
|
||||
|
||||
---
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ static const char *TAG = "mock_csi";
|
|||
|
||||
/** Pi constant. */
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846f
|
||||
#define M_PI 3.14159265358979323846
|
||||
#endif
|
||||
|
||||
/* ---- Channel sweep table ---- */
|
||||
|
|
@ -101,7 +101,7 @@ static const uint8_t s_bad_mac[6] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
|
|||
/**
|
||||
* 32-bit Galois LFSR for deterministic pseudo-random noise.
|
||||
* Avoids stdlib rand() which may not be available on ESP32 bare-metal.
|
||||
* Taps: bits 32, 22, 2, 1 (maximal-length polynomial).
|
||||
* Taps: bits 32, 31, 29, 1 (Galois LFSR polynomial 0xD0000001).
|
||||
*/
|
||||
static uint32_t s_lfsr = 0xDEADBEEF;
|
||||
|
||||
|
|
@ -110,7 +110,7 @@ static uint32_t lfsr_next(void)
|
|||
uint32_t lsb = s_lfsr & 1u;
|
||||
s_lfsr >>= 1;
|
||||
if (lsb) {
|
||||
s_lfsr ^= 0xD0000001u; /* x^32 + x^22 + x^2 + x^1 */
|
||||
s_lfsr ^= 0xD0000001u; /* x^32 + x^31 + x^29 + x^1 */
|
||||
}
|
||||
return s_lfsr;
|
||||
}
|
||||
|
|
@ -130,6 +130,12 @@ static float lfsr_float(void)
|
|||
static mock_state_t s_state;
|
||||
static esp_timer_handle_t s_timer = NULL;
|
||||
|
||||
/** Tracks whether the MAC filter has been set up in gen_mac_filter. */
|
||||
static bool s_mac_filter_initialized = false;
|
||||
|
||||
/** Tracks whether the overflow burst has fired in gen_ring_overflow. */
|
||||
static bool s_overflow_burst_done = false;
|
||||
|
||||
/* External NVS config (for MAC filter scenario). */
|
||||
extern nvs_config_t g_nvs_config;
|
||||
|
||||
|
|
@ -157,9 +163,9 @@ static float channel_to_lambda(uint8_t channel)
|
|||
|
||||
/* ---- Helper: elapsed ms since scenario start ---- */
|
||||
|
||||
static uint32_t scenario_elapsed_ms(void)
|
||||
static int64_t scenario_elapsed_ms(void)
|
||||
{
|
||||
uint32_t now = (uint32_t)(esp_timer_get_time() / 1000);
|
||||
int64_t now = esp_timer_get_time() / 1000;
|
||||
return now - s_state.scenario_start_ms;
|
||||
}
|
||||
|
||||
|
|
@ -277,7 +283,7 @@ static void gen_walking(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
|||
*/
|
||||
static void gen_fall(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
||||
{
|
||||
uint32_t elapsed = scenario_elapsed_ms();
|
||||
int64_t elapsed = scenario_elapsed_ms();
|
||||
uint32_t duration = CONFIG_CSI_MOCK_SCENARIO_DURATION_MS;
|
||||
|
||||
/* Fall occurs at 70% of scenario duration. */
|
||||
|
|
@ -403,7 +409,6 @@ static void gen_mac_filter(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi,
|
|||
bool *skip_inject)
|
||||
{
|
||||
/* Set up the filter MAC to match s_good_mac on first frame of this scenario. */
|
||||
static bool s_mac_filter_initialized = false;
|
||||
if (!s_mac_filter_initialized) {
|
||||
memcpy(g_nvs_config.filter_mac, s_good_mac, 6);
|
||||
g_nvs_config.filter_mac_set = 1;
|
||||
|
|
@ -439,10 +444,10 @@ static void gen_ring_overflow(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi,
|
|||
*channel = 6;
|
||||
*rssi = -50;
|
||||
|
||||
/* Only burst on the first timer tick of this scenario. */
|
||||
uint32_t elapsed = scenario_elapsed_ms();
|
||||
if (elapsed < MOCK_CSI_INTERVAL_MS + 10) {
|
||||
/* Burst once on the first timer tick of this scenario. */
|
||||
if (!s_overflow_burst_done) {
|
||||
*burst_count = OVERFLOW_BURST_COUNT;
|
||||
s_overflow_burst_done = true;
|
||||
} else {
|
||||
*burst_count = 1;
|
||||
}
|
||||
|
|
@ -454,7 +459,7 @@ static void gen_ring_overflow(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi,
|
|||
*/
|
||||
static void gen_boundary_rssi(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
||||
{
|
||||
uint32_t elapsed = scenario_elapsed_ms();
|
||||
int64_t elapsed = scenario_elapsed_ms();
|
||||
uint32_t duration = CONFIG_CSI_MOCK_SCENARIO_DURATION_MS;
|
||||
|
||||
/* Linear sweep: -90 to -10 dBm. */
|
||||
|
|
@ -492,7 +497,7 @@ static void advance_scenario(void)
|
|||
}
|
||||
|
||||
s_state.scenario = s_state.all_idx;
|
||||
s_state.scenario_start_ms = (uint32_t)(esp_timer_get_time() / 1000);
|
||||
s_state.scenario_start_ms = esp_timer_get_time() / 1000;
|
||||
|
||||
/* Reset per-scenario state. */
|
||||
s_state.person_x = 1.0f;
|
||||
|
|
@ -521,7 +526,7 @@ static void mock_timer_cb(void *arg)
|
|||
if (s_state.scenario == MOCK_SCENARIO_ALL ||
|
||||
(s_state.all_idx > 0 && s_state.all_idx < MOCK_SCENARIO_COUNT)) {
|
||||
/* We're running in sequential mode. */
|
||||
uint32_t elapsed = scenario_elapsed_ms();
|
||||
int64_t elapsed = scenario_elapsed_ms();
|
||||
if (elapsed >= CONFIG_CSI_MOCK_SCENARIO_DURATION_MS) {
|
||||
advance_scenario();
|
||||
}
|
||||
|
|
@ -619,8 +624,10 @@ esp_err_t mock_csi_init(uint8_t scenario)
|
|||
s_state.person_speed = WALK_SPEED_MS;
|
||||
s_state.person2_x = 4.0f;
|
||||
s_state.person2_speed = WALK_SPEED_MS * 0.6f;
|
||||
s_state.scenario_start_ms = (uint32_t)(esp_timer_get_time() / 1000);
|
||||
s_state.scenario_start_ms = esp_timer_get_time() / 1000;
|
||||
s_all_done = false;
|
||||
s_mac_filter_initialized = false;
|
||||
s_overflow_burst_done = false;
|
||||
|
||||
/* Reset LFSR to deterministic seed. */
|
||||
s_lfsr = 0xDEADBEEF;
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ typedef struct {
|
|||
float person2_speed; /**< Second person movement speed. */
|
||||
uint8_t channel_idx; /**< Index into channel sweep table. */
|
||||
int8_t rssi_sweep; /**< Current RSSI for boundary sweep. */
|
||||
uint32_t scenario_start_ms; /**< Timestamp when current scenario started. */
|
||||
int64_t scenario_start_ms; /**< Timestamp when current scenario started. */
|
||||
uint8_t all_idx; /**< Current scenario index in SCENARIO_ALL mode. */
|
||||
} mock_state_t;
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,13 @@ CONFIG_CSI_TARGET_IP="10.0.2.2"
|
|||
CONFIG_CSI_MOCK_SCENARIO_DURATION_MS=5000
|
||||
CONFIG_CSI_MOCK_LOG_FRAMES=y
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# FreeRTOS and watchdog: match sdkconfig.qemu for QEMU timing tolerance
|
||||
# ---------------------------------------------------------------------------
|
||||
CONFIG_FREERTOS_TIMER_TASK_STACK_DEPTH=4096
|
||||
CONFIG_ESP_TASK_WDT_TIMEOUT_S=30
|
||||
CONFIG_ESP_INT_WDT_TIMEOUT_MS=800
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Logging and display
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -61,19 +61,19 @@ fuzz_nvs: fuzz_nvs_config.c $(STUBS_SRC)
|
|||
|
||||
# --- Run targets ---
|
||||
run_serialize: fuzz_serialize
|
||||
@mkdir -p corpus
|
||||
./fuzz_serialize corpus/ -max_total_time=$(FUZZ_DURATION) -max_len=2048 -jobs=$(FUZZ_JOBS)
|
||||
@mkdir -p corpus_serialize
|
||||
./fuzz_serialize corpus_serialize/ -max_total_time=$(FUZZ_DURATION) -max_len=2048 -jobs=$(FUZZ_JOBS)
|
||||
|
||||
run_edge: fuzz_edge
|
||||
@mkdir -p corpus
|
||||
./fuzz_edge corpus/ -max_total_time=$(FUZZ_DURATION) -max_len=4096 -jobs=$(FUZZ_JOBS)
|
||||
@mkdir -p corpus_edge
|
||||
./fuzz_edge corpus_edge/ -max_total_time=$(FUZZ_DURATION) -max_len=4096 -jobs=$(FUZZ_JOBS)
|
||||
|
||||
run_nvs: fuzz_nvs
|
||||
@mkdir -p corpus
|
||||
./fuzz_nvs corpus/ -max_total_time=$(FUZZ_DURATION) -max_len=256 -jobs=$(FUZZ_JOBS)
|
||||
@mkdir -p corpus_nvs
|
||||
./fuzz_nvs corpus_nvs/ -max_total_time=$(FUZZ_DURATION) -max_len=256 -jobs=$(FUZZ_JOBS)
|
||||
|
||||
run_all: run_serialize run_edge run_nvs
|
||||
|
||||
clean:
|
||||
rm -f fuzz_serialize fuzz_edge fuzz_nvs
|
||||
rm -rf corpus/
|
||||
rm -rf corpus_serialize/ corpus_edge/ corpus_nvs/
|
||||
|
|
|
|||
|
|
@ -331,7 +331,7 @@ def generate_nvs_binary(csv_content: str, size: int) -> bytes:
|
|||
)
|
||||
|
||||
finally:
|
||||
for p in (csv_path, bin_path):
|
||||
for p in set((csv_path, bin_path)): # deduplicate in case paths are identical
|
||||
if os.path.isfile(p):
|
||||
os.unlink(p)
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,8 @@ Usage:
|
|||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import random
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
|
|
@ -123,31 +125,24 @@ def fault_ring_flood(s: socket.socket) -> None:
|
|||
print(f"[ring_flood] Injected: {sent}/1000 rapid NMI triggers")
|
||||
|
||||
|
||||
def fault_heap_exhaust(s: socket.socket) -> None:
|
||||
"""Write to heap tracking metadata to simulate memory pressure.
|
||||
def fault_heap_exhaust(s: socket.socket, flash_path: str = None) -> None:
|
||||
"""Simulate memory pressure by pausing VM to trigger watchdog/heap checks.
|
||||
|
||||
ESP32-S3 DRAM starts at 0x3FC88000. We write a pattern to the
|
||||
heap control block area to simulate low-memory conditions. The
|
||||
firmware's heap_caps checks should detect the anomaly.
|
||||
Actual heap memory writes require a GDB stub (-gdb tcp::1234).
|
||||
This function probes the heap region and pauses the VM to stress
|
||||
heap management as a realistic simulation.
|
||||
"""
|
||||
# ESP32-S3 internal DRAM heap region
|
||||
heap_base = 0x3FC88000
|
||||
# Write a pattern that looks like an exhausted free-list
|
||||
# (all zeros in the next-free pointer)
|
||||
print(f"[heap_exhaust] Writing to heap metadata at 0x{heap_base:08X}...")
|
||||
# Use QEMU monitor 'memsave' and 'pmemsave' aren't writable;
|
||||
# use 'xp' to read and 'poke' (if available) or GDB memory write
|
||||
# Fallback: use the monitor 'x' command to at least probe the region
|
||||
print("[heap_exhaust] Probing heap region...")
|
||||
resp = send_cmd(s, f"xp /4xw 0x{heap_base:08x}")
|
||||
print(f"[heap_exhaust] Current heap header: {resp.strip()}")
|
||||
|
||||
# Attempt to write garbage via 'write' monitor command (QEMU 8.x+)
|
||||
# Format: write <addr> <size> <data>
|
||||
garbage = "DEADBEEF" * 4 # 16 bytes of garbage
|
||||
resp = send_cmd(s, f"pmemsave 0x{heap_base:08x} 16 /dev/null")
|
||||
# Try direct memory write if supported
|
||||
resp = send_cmd(s, f"x /1xw 0x{heap_base:08x}")
|
||||
print(f"[heap_exhaust] Injected: heap metadata perturbation at 0x{heap_base:08X}")
|
||||
print(f"[heap_exhaust] Heap header: {resp.strip()}")
|
||||
# Pause VM to stress memory management
|
||||
print("[heap_exhaust] Pausing VM for 3s to stress heap management...")
|
||||
send_cmd(s, "stop")
|
||||
time.sleep(3.0)
|
||||
send_cmd(s, "cont")
|
||||
print("[heap_exhaust] WARNING: Actual heap corruption requires GDB stub (-gdb tcp::1234)")
|
||||
print("[heap_exhaust] Injected: 3s VM pause (simulates memory pressure)")
|
||||
|
||||
|
||||
def fault_timer_starvation(s: socket.socket) -> None:
|
||||
|
|
@ -159,51 +154,47 @@ def fault_timer_starvation(s: socket.socket) -> None:
|
|||
print("[timer_starvation] Injected: 500ms execution pause")
|
||||
|
||||
|
||||
def fault_corrupt_frame(s: socket.socket) -> None:
|
||||
"""Write bad magic bytes to CSI frame buffer area.
|
||||
def fault_corrupt_frame(s: socket.socket, flash_path: str = None) -> None:
|
||||
"""Simulate CSI frame corruption by pausing VM during frame processing.
|
||||
|
||||
Mock CSI frames use a magic prefix (0xCSIF or similar). We write
|
||||
an invalid magic to the frame staging buffer so the parser
|
||||
encounters corruption on the next read.
|
||||
Actual memory writes to the frame buffer require a GDB stub
|
||||
(-gdb tcp::1234). This function probes the frame buffer region
|
||||
and pauses the VM mid-frame to simulate corruption effects.
|
||||
"""
|
||||
# Mock CSI buffer is typically in .bss — use a known SRAM region
|
||||
# ESP32-S3 SRAM1: 0x3FC88000 - 0x3FCF0000
|
||||
# Pick an offset likely to hit the frame staging area
|
||||
frame_buf_addr = 0x3FCA0000
|
||||
print(f"[corrupt_frame] Writing bad magic to 0x{frame_buf_addr:08X}...")
|
||||
|
||||
# Write 0xDEADCAFE where the frame magic should be 0x43534946 ("CSIF")
|
||||
# QEMU monitor: attempt memory write
|
||||
print(f"[corrupt_frame] Probing frame buffer at 0x{frame_buf_addr:08X}...")
|
||||
resp = send_cmd(s, f"xp /4xb 0x{frame_buf_addr:08x}")
|
||||
print(f"[corrupt_frame] Before: {resp.strip()}")
|
||||
|
||||
# Use GDB-style memory write if available, otherwise log the attempt
|
||||
# The actual write depends on QEMU version and GDB stub availability
|
||||
resp = send_cmd(s, f"x /1xw 0x{frame_buf_addr:08x}")
|
||||
print(f"[corrupt_frame] Injected: bad magic bytes at 0x{frame_buf_addr:08X}")
|
||||
print(f"[corrupt_frame] Frame buffer: {resp.strip()}")
|
||||
# Pause VM briefly to disrupt frame processing timing
|
||||
print("[corrupt_frame] Pausing VM for 1s to disrupt frame processing...")
|
||||
send_cmd(s, "stop")
|
||||
time.sleep(1.0)
|
||||
send_cmd(s, "cont")
|
||||
print("[corrupt_frame] WARNING: Actual frame corruption requires GDB stub (-gdb tcp::1234)")
|
||||
print(f"[corrupt_frame] Injected: 1s VM pause during frame processing")
|
||||
|
||||
|
||||
def fault_nvs_corrupt(s: socket.socket) -> None:
|
||||
"""Write garbage to the NVS flash region.
|
||||
def fault_nvs_corrupt(s: socket.socket, flash_path: str = None) -> None:
|
||||
"""Write garbage to the NVS flash region on disk.
|
||||
|
||||
NVS partition is at flash offset 0x9000. Under QEMU, the flash is
|
||||
memory-mapped. We write garbage to the NVS page header to trigger
|
||||
NVS corruption detection on next read.
|
||||
When a flash image path is provided, writes random bytes directly
|
||||
to the NVS partition offset (0x9000) in the flash image file.
|
||||
Without a flash path, falls back to a read-only probe via monitor.
|
||||
"""
|
||||
# ESP32-S3 flash is mapped at 0x3C000000 (instruction) / 0x3D000000 (data)
|
||||
# NVS at flash offset 0x9000 maps to 0x3C009000 in QEMU memory
|
||||
nvs_flash_addr = 0x3C009000
|
||||
print(f"[nvs_corrupt] Writing garbage to NVS region 0x{nvs_flash_addr:08X}...")
|
||||
|
||||
# Read current NVS header
|
||||
resp = send_cmd(s, f"xp /8xb 0x{nvs_flash_addr:08x}")
|
||||
print(f"[nvs_corrupt] NVS header before: {resp.strip()}")
|
||||
|
||||
# Attempt to corrupt the NVS page header (first 32 bytes)
|
||||
# NVS page magic is 0xFE (active) or 0xFC (full)
|
||||
# Writing 0x00 makes it appear as an uninitialized page
|
||||
resp = send_cmd(s, f"x /1xw 0x{nvs_flash_addr:08x}")
|
||||
print(f"[nvs_corrupt] Injected: NVS region corruption at 0x{nvs_flash_addr:08X}")
|
||||
if flash_path and os.path.isfile(flash_path):
|
||||
nvs_offset = 0x9000
|
||||
garbage = bytes(random.randint(0, 255) for _ in range(16))
|
||||
with open(flash_path, "r+b") as f:
|
||||
f.seek(nvs_offset)
|
||||
f.write(garbage)
|
||||
print(f"[nvs_corrupt] Wrote 16 garbage bytes at flash offset 0x{nvs_offset:X}")
|
||||
print(f"[nvs_corrupt] Flash image: {flash_path}")
|
||||
else:
|
||||
# Fallback: attempt via monitor (read-only probe)
|
||||
resp = send_cmd(s, f"xp /8xb 0x3C009000")
|
||||
print(f"[nvs_corrupt] NVS region (read-only probe): {resp.strip()}")
|
||||
print(f"[nvs_corrupt] WARNING: No --flash path provided; NVS corruption was NOT injected")
|
||||
print(f"[nvs_corrupt] Pass --flash /path/to/flash.bin for actual corruption")
|
||||
|
||||
|
||||
# Map fault names to injection functions
|
||||
|
|
@ -235,6 +226,10 @@ def main():
|
|||
"--timeout", type=float, default=CMD_TIMEOUT,
|
||||
help=f"Per-command timeout in seconds (default: {CMD_TIMEOUT})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--flash", default=None,
|
||||
help="Path to flash image (for nvs_corrupt direct file writes)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"[inject_fault] Connecting to {args.socket}...")
|
||||
|
|
@ -242,7 +237,14 @@ def main():
|
|||
|
||||
print(f"[inject_fault] Injecting fault: {args.fault}")
|
||||
try:
|
||||
FAULT_MAP[args.fault](s)
|
||||
fault_fn = FAULT_MAP[args.fault]
|
||||
# Pass flash_path to faults that accept it
|
||||
import inspect
|
||||
sig = inspect.signature(fault_fn)
|
||||
if "flash_path" in sig.parameters:
|
||||
fault_fn(s, flash_path=args.flash)
|
||||
else:
|
||||
fault_fn(s)
|
||||
except Exception as e:
|
||||
print(f"ERROR: Fault injection failed: {e}", file=sys.stderr)
|
||||
s.close()
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
# Fault types:
|
||||
# 1. wifi_kill — Pause/resume VM to simulate WiFi reconnect
|
||||
# 2. ring_flood — Inject 1000 rapid mock frames (ring buffer stress)
|
||||
# 3. heap_pressure — Write to heap metadata to simulate low memory
|
||||
# 3. heap_exhaust — Write to heap metadata to simulate low memory
|
||||
# 4. timer_starvation — Pause VM for 500ms to starve FreeRTOS timers
|
||||
# 5. corrupt_frame — Inject a CSI frame with bad magic bytes
|
||||
# 6. nvs_corrupt — Write garbage to NVS flash region
|
||||
|
|
@ -37,7 +37,7 @@ followed by a recovery window and health check.
|
|||
Fault types:
|
||||
wifi_kill Pause/resume VM to simulate WiFi reconnect
|
||||
ring_flood Inject 1000 rapid mock frames (ring buffer stress)
|
||||
heap_pressure Write to heap metadata to simulate low memory
|
||||
heap_exhaust Write to heap metadata to simulate low memory
|
||||
timer_starvation Pause VM for 500ms to starve FreeRTOS timers
|
||||
corrupt_frame Inject a CSI frame with bad magic bytes
|
||||
nvs_corrupt Write garbage to NVS flash region
|
||||
|
|
@ -84,7 +84,7 @@ UART_LOG="$LOG_DIR/qemu_uart.log"
|
|||
QEMU_PID=""
|
||||
|
||||
# Fault definitions
|
||||
FAULTS=("wifi_kill" "ring_flood" "heap_pressure" "timer_starvation" "corrupt_frame" "nvs_corrupt")
|
||||
FAULTS=("wifi_kill" "ring_flood" "heap_exhaust" "timer_starvation" "corrupt_frame" "nvs_corrupt")
|
||||
declare -a FAULT_RESULTS=()
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
|
@ -152,10 +152,10 @@ inject_ring_flood() {
|
|||
--fault ring_flood
|
||||
}
|
||||
|
||||
inject_heap_pressure() {
|
||||
# Use monitor to simulate memory pressure by writing to heap tracking
|
||||
# regions. The firmware's heap checks should detect and handle this.
|
||||
echo " [inject] Simulating heap pressure via memory write..."
|
||||
inject_heap_exhaust() {
|
||||
# Simulate memory pressure by pausing the VM to stress heap management.
|
||||
# Actual heap memory writes require GDB stub.
|
||||
echo " [inject] Simulating heap pressure via VM pause..."
|
||||
python3 "$SCRIPT_DIR/inject_fault.py" \
|
||||
--socket "$MONITOR_SOCK" \
|
||||
--fault heap_exhaust
|
||||
|
|
@ -180,12 +180,13 @@ inject_corrupt_frame() {
|
|||
}
|
||||
|
||||
inject_nvs_corrupt() {
|
||||
# Write garbage to the NVS flash region (offset 0x9000).
|
||||
# Write garbage to the NVS flash region (offset 0x9000) via direct file write.
|
||||
# The firmware should detect NVS corruption and fall back to defaults.
|
||||
echo " [inject] Corrupting NVS flash region..."
|
||||
python3 "$SCRIPT_DIR/inject_fault.py" \
|
||||
--socket "$MONITOR_SOCK" \
|
||||
--fault nvs_corrupt
|
||||
--fault nvs_corrupt \
|
||||
--flash "$FLASH_IMAGE"
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
|
@ -307,7 +308,7 @@ for fault in "${FAULTS[@]}"; do
|
|||
case "$fault" in
|
||||
wifi_kill) inject_wifi_kill ;;
|
||||
ring_flood) inject_ring_flood ;;
|
||||
heap_pressure) inject_heap_pressure ;;
|
||||
heap_exhaust) inject_heap_exhaust ;;
|
||||
timer_starvation) inject_timer_starvation ;;
|
||||
corrupt_frame) inject_corrupt_frame ;;
|
||||
nvs_corrupt) inject_nvs_corrupt ;;
|
||||
|
|
|
|||
|
|
@ -11,7 +11,8 @@
|
|||
#
|
||||
# Environment variables:
|
||||
# QEMU_PATH - Path to qemu-system-xtensa (default: qemu-system-xtensa)
|
||||
# MESH_TIMEOUT - Timeout in seconds (default: 45)
|
||||
# QEMU_TIMEOUT - Timeout in seconds (default: 45)
|
||||
# MESH_TIMEOUT - Deprecated alias for QEMU_TIMEOUT
|
||||
# SKIP_BUILD - Set to "1" to skip the idf.py build step
|
||||
# BRIDGE_NAME - Bridge interface name (default: qemu-br0)
|
||||
# BRIDGE_SUBNET - Bridge IP/mask (default: 10.0.0.1/24)
|
||||
|
|
@ -391,7 +392,7 @@ VALIDATE_ARGS=("--nodes" "$N_NODES")
|
|||
|
||||
# Pass results file if it was produced
|
||||
if [ -f "$RESULTS_FILE" ]; then
|
||||
VALIDATE_ARGS+=("$RESULTS_FILE")
|
||||
VALIDATE_ARGS+=("--results" "$RESULTS_FILE")
|
||||
else
|
||||
echo "WARNING: Aggregator results file not found: $RESULTS_FILE"
|
||||
echo "Validation will rely on node logs only."
|
||||
|
|
|
|||
|
|
@ -97,9 +97,11 @@ trap cleanup EXIT INT TERM
|
|||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
now_ms() {
|
||||
# Millisecond timestamp (portable: uses date +%s%N on Linux, perl fallback)
|
||||
if date +%s%N &>/dev/null; then
|
||||
echo $(( $(date +%s%N) / 1000000 ))
|
||||
# Millisecond timestamp (portable: Linux date +%s%N, macOS perl fallback)
|
||||
local ns
|
||||
ns=$(date +%s%N 2>/dev/null)
|
||||
if [[ "$ns" =~ ^[0-9]+$ ]]; then
|
||||
echo $(( ns / 1000000 ))
|
||||
else
|
||||
perl -MTime::HiRes=time -e 'printf "%d\n", time()*1000' 2>/dev/null || \
|
||||
echo $(( $(date +%s) * 1000 ))
|
||||
|
|
@ -284,15 +286,15 @@ for test_name in "${TESTS[@]}"; do
|
|||
# Restore to post_first_frame state
|
||||
restore_snapshot "post_first_frame"
|
||||
|
||||
# Clear the UART log for this test segment
|
||||
> "$LOG_DIR/qemu_uart.log"
|
||||
# Record current log length so we can extract only new lines
|
||||
pre_lines=$(wc -l < "$LOG_DIR/qemu_uart.log" 2>/dev/null || echo 0)
|
||||
|
||||
# Let execution continue for TIMEOUT_SEC seconds
|
||||
echo "[test] Running for ${TIMEOUT_SEC}s..."
|
||||
sleep "$TIMEOUT_SEC"
|
||||
|
||||
# Capture the log segment for this test
|
||||
cp "$LOG_DIR/qemu_uart.log" "$test_log"
|
||||
# Capture only the new log lines produced during this test
|
||||
tail -n +$((pre_lines + 1)) "$LOG_DIR/qemu_uart.log" > "$test_log"
|
||||
|
||||
t_end=$(now_ms)
|
||||
elapsed_ms=$((t_end - t_start))
|
||||
|
|
|
|||
|
|
@ -3,8 +3,9 @@
|
|||
QEMU ESP32-S3 UART Output Validator (ADR-061)
|
||||
|
||||
Parses the UART log captured from a QEMU firmware run and validates
|
||||
14 checks covering boot, NVS, mock CSI, edge processing, vitals,
|
||||
presence/fall detection, serialization, and crash indicators.
|
||||
16 checks covering boot, NVS, mock CSI, edge processing, vitals,
|
||||
presence/fall detection, serialization, crash indicators, scenario
|
||||
completion, and frame rate sanity.
|
||||
|
||||
Usage:
|
||||
python3 validate_qemu_output.py <log_file>
|
||||
|
|
@ -120,7 +121,7 @@ class ValidationReport:
|
|||
|
||||
|
||||
def validate_log(log_text: str) -> ValidationReport:
|
||||
"""Run all 14 validation checks against the UART log text."""
|
||||
"""Run all 16 validation checks against the UART log text."""
|
||||
report = ValidationReport()
|
||||
lines = log_text.splitlines()
|
||||
log_lower = log_text.lower()
|
||||
|
|
|
|||
Loading…
Reference in New Issue