fix(qemu): resolve 23 bugs from deep code review
CRITICAL: - inject_fault.py: make nvs_corrupt write actual bytes via --flash arg; heap_exhaust and corrupt_frame now pause VM with honest WARNING about GDB stub requirement for real memory writes - firmware-qemu.yml: remove github.run_id from cache key (was causing 100% cache miss rate, rebuilding QEMU every run) - mock_csi.c: change scenario_elapsed_ms() to int64_t (uint32 wrapped at ~49 days) HIGH: - qemu-mesh-test.sh: pass --results flag to validate_mesh_test.py (was passing positional arg to named-only parameter) - test/Makefile: separate corpus directories per fuzz target (corpus_serialize/, corpus_edge/, corpus_nvs/) - qemu-snapshot-test.sh: replace log truncation with tail-based extraction (truncation created sparse file while QEMU held fd) MEDIUM: - mock_csi.c: reset s_mac_filter_initialized in mock_csi_init() - mock_csi.c: fix LFSR polynomial comment (32,31,29,1 not 32,22,2,1) - sdkconfig.coverage: add FreeRTOS timer stack 4096 and WDT tuning - firmware-qemu.yml: replace continue-on-error with FUZZER_CRASH env - qemu-chaos-test.sh: rename heap_pressure to heap_exhaust for consistency - validate_qemu_output.py: fix docstring "14 checks" -> "16 checks" - generate_nvs_matrix.py: deduplicate temp file cleanup paths LOW: - mock_csi.c: remove M_PI float suffix, fix overflow burst flag - qemu-snapshot-test.sh: fix now_ms() for macOS date +%s%N - ADR-061: fix scenario 8 RSSI range to -90...-10 dBm - launch.json: remove contradictory compound debug config Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
1dbea4e9fb
commit
e574cbe129
|
|
@ -32,9 +32,9 @@ jobs:
|
||||||
with:
|
with:
|
||||||
path: /opt/qemu-esp32
|
path: /opt/qemu-esp32
|
||||||
# Include date component so cache refreshes monthly when branch updates
|
# Include date component so cache refreshes monthly when branch updates
|
||||||
key: qemu-esp32s3-${{ env.QEMU_BRANCH }}-v3-${{ github.run_id }}
|
key: qemu-esp32s3-${{ env.QEMU_BRANCH }}-v4
|
||||||
restore-keys: |
|
restore-keys: |
|
||||||
qemu-esp32s3-${{ env.QEMU_BRANCH }}-v3-
|
qemu-esp32s3-${{ env.QEMU_BRANCH }}-
|
||||||
|
|
||||||
- name: Install QEMU build dependencies
|
- name: Install QEMU build dependencies
|
||||||
if: steps.cache-qemu.outputs.cache-hit != 'true'
|
if: steps.cache-qemu.outputs.cache-hit != 'true'
|
||||||
|
|
@ -215,26 +215,23 @@ jobs:
|
||||||
|
|
||||||
- name: Run serialize fuzzer (60s)
|
- name: Run serialize fuzzer (60s)
|
||||||
working-directory: firmware/esp32-csi-node/test
|
working-directory: firmware/esp32-csi-node/test
|
||||||
run: make run_serialize FUZZ_DURATION=60
|
run: make run_serialize FUZZ_DURATION=60 || echo "FUZZER_CRASH=serialize" >> "$GITHUB_ENV"
|
||||||
continue-on-error: true
|
|
||||||
|
|
||||||
- name: Run edge enqueue fuzzer (60s)
|
- name: Run edge enqueue fuzzer (60s)
|
||||||
working-directory: firmware/esp32-csi-node/test
|
working-directory: firmware/esp32-csi-node/test
|
||||||
run: make run_edge FUZZ_DURATION=60
|
run: make run_edge FUZZ_DURATION=60 || echo "FUZZER_CRASH=edge" >> "$GITHUB_ENV"
|
||||||
continue-on-error: true
|
|
||||||
|
|
||||||
- name: Run NVS config fuzzer (60s)
|
- name: Run NVS config fuzzer (60s)
|
||||||
working-directory: firmware/esp32-csi-node/test
|
working-directory: firmware/esp32-csi-node/test
|
||||||
run: make run_nvs FUZZ_DURATION=60
|
run: make run_nvs FUZZ_DURATION=60 || echo "FUZZER_CRASH=nvs" >> "$GITHUB_ENV"
|
||||||
continue-on-error: true
|
|
||||||
|
|
||||||
- name: Check for crashes
|
- name: Check for crashes
|
||||||
working-directory: firmware/esp32-csi-node/test
|
working-directory: firmware/esp32-csi-node/test
|
||||||
run: |
|
run: |
|
||||||
CRASHES=$(find . -type f -name "crash-*" -o -type f -name "oom-*" -o -type f -name "timeout-*" 2>/dev/null | wc -l)
|
CRASHES=$(find . -type f \( -name "crash-*" -o -name "oom-*" -o -name "timeout-*" \) 2>/dev/null | wc -l)
|
||||||
echo "Crash artifacts found: $CRASHES"
|
echo "Crash artifacts found: $CRASHES"
|
||||||
if [ "$CRASHES" -gt 0 ]; then
|
if [ "$CRASHES" -gt 0 ] || [ -n "${FUZZER_CRASH:-}" ]; then
|
||||||
echo "::error::Fuzzer found $CRASHES crash/oom/timeout artifacts"
|
echo "::error::Fuzzer found $CRASHES crash/oom/timeout artifacts. FUZZER_CRASH=${FUZZER_CRASH:-none}"
|
||||||
ls -la crash-* oom-* timeout-* 2>/dev/null
|
ls -la crash-* oom-* timeout-* 2>/dev/null
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -45,14 +45,5 @@
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
|
||||||
"compounds": [
|
|
||||||
{
|
|
||||||
"name": "QEMU: Launch + Debug",
|
|
||||||
"configurations": [
|
|
||||||
"QEMU ESP32-S3 Debug",
|
|
||||||
"QEMU ESP32-S3 Debug (attach)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -237,7 +237,7 @@ This model exercises:
|
||||||
| 5 | Channel sweep | 5s | Frames on channels 1, 6, 11 in sequence |
|
| 5 | Channel sweep | 5s | Frames on channels 1, 6, 11 in sequence |
|
||||||
| 6 | MAC filter test | 5s | Frames with wrong MAC are dropped (counter check) |
|
| 6 | MAC filter test | 5s | Frames with wrong MAC are dropped (counter check) |
|
||||||
| 7 | Ring buffer overflow | 3s | 1000 frames in 100ms burst, graceful drop |
|
| 7 | Ring buffer overflow | 3s | 1000 frames in 100ms burst, graceful drop |
|
||||||
| 8 | Boundary RSSI | 5s | RSSI sweeps -127 to 0, no crash |
|
| 8 | Boundary RSSI | 5s | RSSI sweeps -90 to -10 dBm, no crash |
|
||||||
| 9 | Zero-length frame | 2s | `iq_len=0` frames, serialize returns 0 |
|
| 9 | Zero-length frame | 2s | `iq_len=0` frames, serialize returns 0 |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,7 @@ static const char *TAG = "mock_csi";
|
||||||
|
|
||||||
/** Pi constant. */
|
/** Pi constant. */
|
||||||
#ifndef M_PI
|
#ifndef M_PI
|
||||||
#define M_PI 3.14159265358979323846f
|
#define M_PI 3.14159265358979323846
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* ---- Channel sweep table ---- */
|
/* ---- Channel sweep table ---- */
|
||||||
|
|
@ -101,7 +101,7 @@ static const uint8_t s_bad_mac[6] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
|
||||||
/**
|
/**
|
||||||
* 32-bit Galois LFSR for deterministic pseudo-random noise.
|
* 32-bit Galois LFSR for deterministic pseudo-random noise.
|
||||||
* Avoids stdlib rand() which may not be available on ESP32 bare-metal.
|
* Avoids stdlib rand() which may not be available on ESP32 bare-metal.
|
||||||
* Taps: bits 32, 22, 2, 1 (maximal-length polynomial).
|
* Taps: bits 32, 31, 29, 1 (Galois LFSR polynomial 0xD0000001).
|
||||||
*/
|
*/
|
||||||
static uint32_t s_lfsr = 0xDEADBEEF;
|
static uint32_t s_lfsr = 0xDEADBEEF;
|
||||||
|
|
||||||
|
|
@ -110,7 +110,7 @@ static uint32_t lfsr_next(void)
|
||||||
uint32_t lsb = s_lfsr & 1u;
|
uint32_t lsb = s_lfsr & 1u;
|
||||||
s_lfsr >>= 1;
|
s_lfsr >>= 1;
|
||||||
if (lsb) {
|
if (lsb) {
|
||||||
s_lfsr ^= 0xD0000001u; /* x^32 + x^22 + x^2 + x^1 */
|
s_lfsr ^= 0xD0000001u; /* x^32 + x^31 + x^29 + x^1 */
|
||||||
}
|
}
|
||||||
return s_lfsr;
|
return s_lfsr;
|
||||||
}
|
}
|
||||||
|
|
@ -130,6 +130,12 @@ static float lfsr_float(void)
|
||||||
static mock_state_t s_state;
|
static mock_state_t s_state;
|
||||||
static esp_timer_handle_t s_timer = NULL;
|
static esp_timer_handle_t s_timer = NULL;
|
||||||
|
|
||||||
|
/** Tracks whether the MAC filter has been set up in gen_mac_filter. */
|
||||||
|
static bool s_mac_filter_initialized = false;
|
||||||
|
|
||||||
|
/** Tracks whether the overflow burst has fired in gen_ring_overflow. */
|
||||||
|
static bool s_overflow_burst_done = false;
|
||||||
|
|
||||||
/* External NVS config (for MAC filter scenario). */
|
/* External NVS config (for MAC filter scenario). */
|
||||||
extern nvs_config_t g_nvs_config;
|
extern nvs_config_t g_nvs_config;
|
||||||
|
|
||||||
|
|
@ -157,9 +163,9 @@ static float channel_to_lambda(uint8_t channel)
|
||||||
|
|
||||||
/* ---- Helper: elapsed ms since scenario start ---- */
|
/* ---- Helper: elapsed ms since scenario start ---- */
|
||||||
|
|
||||||
static uint32_t scenario_elapsed_ms(void)
|
static int64_t scenario_elapsed_ms(void)
|
||||||
{
|
{
|
||||||
uint32_t now = (uint32_t)(esp_timer_get_time() / 1000);
|
int64_t now = esp_timer_get_time() / 1000;
|
||||||
return now - s_state.scenario_start_ms;
|
return now - s_state.scenario_start_ms;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -277,7 +283,7 @@ static void gen_walking(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
||||||
*/
|
*/
|
||||||
static void gen_fall(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
static void gen_fall(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
||||||
{
|
{
|
||||||
uint32_t elapsed = scenario_elapsed_ms();
|
int64_t elapsed = scenario_elapsed_ms();
|
||||||
uint32_t duration = CONFIG_CSI_MOCK_SCENARIO_DURATION_MS;
|
uint32_t duration = CONFIG_CSI_MOCK_SCENARIO_DURATION_MS;
|
||||||
|
|
||||||
/* Fall occurs at 70% of scenario duration. */
|
/* Fall occurs at 70% of scenario duration. */
|
||||||
|
|
@ -403,7 +409,6 @@ static void gen_mac_filter(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi,
|
||||||
bool *skip_inject)
|
bool *skip_inject)
|
||||||
{
|
{
|
||||||
/* Set up the filter MAC to match s_good_mac on first frame of this scenario. */
|
/* Set up the filter MAC to match s_good_mac on first frame of this scenario. */
|
||||||
static bool s_mac_filter_initialized = false;
|
|
||||||
if (!s_mac_filter_initialized) {
|
if (!s_mac_filter_initialized) {
|
||||||
memcpy(g_nvs_config.filter_mac, s_good_mac, 6);
|
memcpy(g_nvs_config.filter_mac, s_good_mac, 6);
|
||||||
g_nvs_config.filter_mac_set = 1;
|
g_nvs_config.filter_mac_set = 1;
|
||||||
|
|
@ -439,10 +444,10 @@ static void gen_ring_overflow(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi,
|
||||||
*channel = 6;
|
*channel = 6;
|
||||||
*rssi = -50;
|
*rssi = -50;
|
||||||
|
|
||||||
/* Only burst on the first timer tick of this scenario. */
|
/* Burst once on the first timer tick of this scenario. */
|
||||||
uint32_t elapsed = scenario_elapsed_ms();
|
if (!s_overflow_burst_done) {
|
||||||
if (elapsed < MOCK_CSI_INTERVAL_MS + 10) {
|
|
||||||
*burst_count = OVERFLOW_BURST_COUNT;
|
*burst_count = OVERFLOW_BURST_COUNT;
|
||||||
|
s_overflow_burst_done = true;
|
||||||
} else {
|
} else {
|
||||||
*burst_count = 1;
|
*burst_count = 1;
|
||||||
}
|
}
|
||||||
|
|
@ -454,7 +459,7 @@ static void gen_ring_overflow(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi,
|
||||||
*/
|
*/
|
||||||
static void gen_boundary_rssi(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
static void gen_boundary_rssi(uint8_t *iq_buf, uint8_t *channel, int8_t *rssi)
|
||||||
{
|
{
|
||||||
uint32_t elapsed = scenario_elapsed_ms();
|
int64_t elapsed = scenario_elapsed_ms();
|
||||||
uint32_t duration = CONFIG_CSI_MOCK_SCENARIO_DURATION_MS;
|
uint32_t duration = CONFIG_CSI_MOCK_SCENARIO_DURATION_MS;
|
||||||
|
|
||||||
/* Linear sweep: -90 to -10 dBm. */
|
/* Linear sweep: -90 to -10 dBm. */
|
||||||
|
|
@ -492,7 +497,7 @@ static void advance_scenario(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
s_state.scenario = s_state.all_idx;
|
s_state.scenario = s_state.all_idx;
|
||||||
s_state.scenario_start_ms = (uint32_t)(esp_timer_get_time() / 1000);
|
s_state.scenario_start_ms = esp_timer_get_time() / 1000;
|
||||||
|
|
||||||
/* Reset per-scenario state. */
|
/* Reset per-scenario state. */
|
||||||
s_state.person_x = 1.0f;
|
s_state.person_x = 1.0f;
|
||||||
|
|
@ -521,7 +526,7 @@ static void mock_timer_cb(void *arg)
|
||||||
if (s_state.scenario == MOCK_SCENARIO_ALL ||
|
if (s_state.scenario == MOCK_SCENARIO_ALL ||
|
||||||
(s_state.all_idx > 0 && s_state.all_idx < MOCK_SCENARIO_COUNT)) {
|
(s_state.all_idx > 0 && s_state.all_idx < MOCK_SCENARIO_COUNT)) {
|
||||||
/* We're running in sequential mode. */
|
/* We're running in sequential mode. */
|
||||||
uint32_t elapsed = scenario_elapsed_ms();
|
int64_t elapsed = scenario_elapsed_ms();
|
||||||
if (elapsed >= CONFIG_CSI_MOCK_SCENARIO_DURATION_MS) {
|
if (elapsed >= CONFIG_CSI_MOCK_SCENARIO_DURATION_MS) {
|
||||||
advance_scenario();
|
advance_scenario();
|
||||||
}
|
}
|
||||||
|
|
@ -619,8 +624,10 @@ esp_err_t mock_csi_init(uint8_t scenario)
|
||||||
s_state.person_speed = WALK_SPEED_MS;
|
s_state.person_speed = WALK_SPEED_MS;
|
||||||
s_state.person2_x = 4.0f;
|
s_state.person2_x = 4.0f;
|
||||||
s_state.person2_speed = WALK_SPEED_MS * 0.6f;
|
s_state.person2_speed = WALK_SPEED_MS * 0.6f;
|
||||||
s_state.scenario_start_ms = (uint32_t)(esp_timer_get_time() / 1000);
|
s_state.scenario_start_ms = esp_timer_get_time() / 1000;
|
||||||
s_all_done = false;
|
s_all_done = false;
|
||||||
|
s_mac_filter_initialized = false;
|
||||||
|
s_overflow_burst_done = false;
|
||||||
|
|
||||||
/* Reset LFSR to deterministic seed. */
|
/* Reset LFSR to deterministic seed. */
|
||||||
s_lfsr = 0xDEADBEEF;
|
s_lfsr = 0xDEADBEEF;
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,7 @@ typedef struct {
|
||||||
float person2_speed; /**< Second person movement speed. */
|
float person2_speed; /**< Second person movement speed. */
|
||||||
uint8_t channel_idx; /**< Index into channel sweep table. */
|
uint8_t channel_idx; /**< Index into channel sweep table. */
|
||||||
int8_t rssi_sweep; /**< Current RSSI for boundary sweep. */
|
int8_t rssi_sweep; /**< Current RSSI for boundary sweep. */
|
||||||
uint32_t scenario_start_ms; /**< Timestamp when current scenario started. */
|
int64_t scenario_start_ms; /**< Timestamp when current scenario started. */
|
||||||
uint8_t all_idx; /**< Current scenario index in SCENARIO_ALL mode. */
|
uint8_t all_idx; /**< Current scenario index in SCENARIO_ALL mode. */
|
||||||
} mock_state_t;
|
} mock_state_t;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,13 @@ CONFIG_CSI_TARGET_IP="10.0.2.2"
|
||||||
CONFIG_CSI_MOCK_SCENARIO_DURATION_MS=5000
|
CONFIG_CSI_MOCK_SCENARIO_DURATION_MS=5000
|
||||||
CONFIG_CSI_MOCK_LOG_FRAMES=y
|
CONFIG_CSI_MOCK_LOG_FRAMES=y
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# FreeRTOS and watchdog: match sdkconfig.qemu for QEMU timing tolerance
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
CONFIG_FREERTOS_TIMER_TASK_STACK_DEPTH=4096
|
||||||
|
CONFIG_ESP_TASK_WDT_TIMEOUT_S=30
|
||||||
|
CONFIG_ESP_INT_WDT_TIMEOUT_MS=800
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Logging and display
|
# Logging and display
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -61,19 +61,19 @@ fuzz_nvs: fuzz_nvs_config.c $(STUBS_SRC)
|
||||||
|
|
||||||
# --- Run targets ---
|
# --- Run targets ---
|
||||||
run_serialize: fuzz_serialize
|
run_serialize: fuzz_serialize
|
||||||
@mkdir -p corpus
|
@mkdir -p corpus_serialize
|
||||||
./fuzz_serialize corpus/ -max_total_time=$(FUZZ_DURATION) -max_len=2048 -jobs=$(FUZZ_JOBS)
|
./fuzz_serialize corpus_serialize/ -max_total_time=$(FUZZ_DURATION) -max_len=2048 -jobs=$(FUZZ_JOBS)
|
||||||
|
|
||||||
run_edge: fuzz_edge
|
run_edge: fuzz_edge
|
||||||
@mkdir -p corpus
|
@mkdir -p corpus_edge
|
||||||
./fuzz_edge corpus/ -max_total_time=$(FUZZ_DURATION) -max_len=4096 -jobs=$(FUZZ_JOBS)
|
./fuzz_edge corpus_edge/ -max_total_time=$(FUZZ_DURATION) -max_len=4096 -jobs=$(FUZZ_JOBS)
|
||||||
|
|
||||||
run_nvs: fuzz_nvs
|
run_nvs: fuzz_nvs
|
||||||
@mkdir -p corpus
|
@mkdir -p corpus_nvs
|
||||||
./fuzz_nvs corpus/ -max_total_time=$(FUZZ_DURATION) -max_len=256 -jobs=$(FUZZ_JOBS)
|
./fuzz_nvs corpus_nvs/ -max_total_time=$(FUZZ_DURATION) -max_len=256 -jobs=$(FUZZ_JOBS)
|
||||||
|
|
||||||
run_all: run_serialize run_edge run_nvs
|
run_all: run_serialize run_edge run_nvs
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f fuzz_serialize fuzz_edge fuzz_nvs
|
rm -f fuzz_serialize fuzz_edge fuzz_nvs
|
||||||
rm -rf corpus/
|
rm -rf corpus_serialize/ corpus_edge/ corpus_nvs/
|
||||||
|
|
|
||||||
|
|
@ -331,7 +331,7 @@ def generate_nvs_binary(csv_content: str, size: int) -> bytes:
|
||||||
)
|
)
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
for p in (csv_path, bin_path):
|
for p in set((csv_path, bin_path)): # deduplicate in case paths are identical
|
||||||
if os.path.isfile(p):
|
if os.path.isfile(p):
|
||||||
os.unlink(p)
|
os.unlink(p)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,8 @@ Usage:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import os
|
||||||
|
import random
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
|
@ -123,31 +125,24 @@ def fault_ring_flood(s: socket.socket) -> None:
|
||||||
print(f"[ring_flood] Injected: {sent}/1000 rapid NMI triggers")
|
print(f"[ring_flood] Injected: {sent}/1000 rapid NMI triggers")
|
||||||
|
|
||||||
|
|
||||||
def fault_heap_exhaust(s: socket.socket) -> None:
|
def fault_heap_exhaust(s: socket.socket, flash_path: str = None) -> None:
|
||||||
"""Write to heap tracking metadata to simulate memory pressure.
|
"""Simulate memory pressure by pausing VM to trigger watchdog/heap checks.
|
||||||
|
|
||||||
ESP32-S3 DRAM starts at 0x3FC88000. We write a pattern to the
|
Actual heap memory writes require a GDB stub (-gdb tcp::1234).
|
||||||
heap control block area to simulate low-memory conditions. The
|
This function probes the heap region and pauses the VM to stress
|
||||||
firmware's heap_caps checks should detect the anomaly.
|
heap management as a realistic simulation.
|
||||||
"""
|
"""
|
||||||
# ESP32-S3 internal DRAM heap region
|
|
||||||
heap_base = 0x3FC88000
|
heap_base = 0x3FC88000
|
||||||
# Write a pattern that looks like an exhausted free-list
|
print("[heap_exhaust] Probing heap region...")
|
||||||
# (all zeros in the next-free pointer)
|
|
||||||
print(f"[heap_exhaust] Writing to heap metadata at 0x{heap_base:08X}...")
|
|
||||||
# Use QEMU monitor 'memsave' and 'pmemsave' aren't writable;
|
|
||||||
# use 'xp' to read and 'poke' (if available) or GDB memory write
|
|
||||||
# Fallback: use the monitor 'x' command to at least probe the region
|
|
||||||
resp = send_cmd(s, f"xp /4xw 0x{heap_base:08x}")
|
resp = send_cmd(s, f"xp /4xw 0x{heap_base:08x}")
|
||||||
print(f"[heap_exhaust] Current heap header: {resp.strip()}")
|
print(f"[heap_exhaust] Heap header: {resp.strip()}")
|
||||||
|
# Pause VM to stress memory management
|
||||||
# Attempt to write garbage via 'write' monitor command (QEMU 8.x+)
|
print("[heap_exhaust] Pausing VM for 3s to stress heap management...")
|
||||||
# Format: write <addr> <size> <data>
|
send_cmd(s, "stop")
|
||||||
garbage = "DEADBEEF" * 4 # 16 bytes of garbage
|
time.sleep(3.0)
|
||||||
resp = send_cmd(s, f"pmemsave 0x{heap_base:08x} 16 /dev/null")
|
send_cmd(s, "cont")
|
||||||
# Try direct memory write if supported
|
print("[heap_exhaust] WARNING: Actual heap corruption requires GDB stub (-gdb tcp::1234)")
|
||||||
resp = send_cmd(s, f"x /1xw 0x{heap_base:08x}")
|
print("[heap_exhaust] Injected: 3s VM pause (simulates memory pressure)")
|
||||||
print(f"[heap_exhaust] Injected: heap metadata perturbation at 0x{heap_base:08X}")
|
|
||||||
|
|
||||||
|
|
||||||
def fault_timer_starvation(s: socket.socket) -> None:
|
def fault_timer_starvation(s: socket.socket) -> None:
|
||||||
|
|
@ -159,51 +154,47 @@ def fault_timer_starvation(s: socket.socket) -> None:
|
||||||
print("[timer_starvation] Injected: 500ms execution pause")
|
print("[timer_starvation] Injected: 500ms execution pause")
|
||||||
|
|
||||||
|
|
||||||
def fault_corrupt_frame(s: socket.socket) -> None:
|
def fault_corrupt_frame(s: socket.socket, flash_path: str = None) -> None:
|
||||||
"""Write bad magic bytes to CSI frame buffer area.
|
"""Simulate CSI frame corruption by pausing VM during frame processing.
|
||||||
|
|
||||||
Mock CSI frames use a magic prefix (0xCSIF or similar). We write
|
Actual memory writes to the frame buffer require a GDB stub
|
||||||
an invalid magic to the frame staging buffer so the parser
|
(-gdb tcp::1234). This function probes the frame buffer region
|
||||||
encounters corruption on the next read.
|
and pauses the VM mid-frame to simulate corruption effects.
|
||||||
"""
|
"""
|
||||||
# Mock CSI buffer is typically in .bss — use a known SRAM region
|
|
||||||
# ESP32-S3 SRAM1: 0x3FC88000 - 0x3FCF0000
|
|
||||||
# Pick an offset likely to hit the frame staging area
|
|
||||||
frame_buf_addr = 0x3FCA0000
|
frame_buf_addr = 0x3FCA0000
|
||||||
print(f"[corrupt_frame] Writing bad magic to 0x{frame_buf_addr:08X}...")
|
print(f"[corrupt_frame] Probing frame buffer at 0x{frame_buf_addr:08X}...")
|
||||||
|
|
||||||
# Write 0xDEADCAFE where the frame magic should be 0x43534946 ("CSIF")
|
|
||||||
# QEMU monitor: attempt memory write
|
|
||||||
resp = send_cmd(s, f"xp /4xb 0x{frame_buf_addr:08x}")
|
resp = send_cmd(s, f"xp /4xb 0x{frame_buf_addr:08x}")
|
||||||
print(f"[corrupt_frame] Before: {resp.strip()}")
|
print(f"[corrupt_frame] Frame buffer: {resp.strip()}")
|
||||||
|
# Pause VM briefly to disrupt frame processing timing
|
||||||
# Use GDB-style memory write if available, otherwise log the attempt
|
print("[corrupt_frame] Pausing VM for 1s to disrupt frame processing...")
|
||||||
# The actual write depends on QEMU version and GDB stub availability
|
send_cmd(s, "stop")
|
||||||
resp = send_cmd(s, f"x /1xw 0x{frame_buf_addr:08x}")
|
time.sleep(1.0)
|
||||||
print(f"[corrupt_frame] Injected: bad magic bytes at 0x{frame_buf_addr:08X}")
|
send_cmd(s, "cont")
|
||||||
|
print("[corrupt_frame] WARNING: Actual frame corruption requires GDB stub (-gdb tcp::1234)")
|
||||||
|
print(f"[corrupt_frame] Injected: 1s VM pause during frame processing")
|
||||||
|
|
||||||
|
|
||||||
def fault_nvs_corrupt(s: socket.socket) -> None:
|
def fault_nvs_corrupt(s: socket.socket, flash_path: str = None) -> None:
|
||||||
"""Write garbage to the NVS flash region.
|
"""Write garbage to the NVS flash region on disk.
|
||||||
|
|
||||||
NVS partition is at flash offset 0x9000. Under QEMU, the flash is
|
When a flash image path is provided, writes random bytes directly
|
||||||
memory-mapped. We write garbage to the NVS page header to trigger
|
to the NVS partition offset (0x9000) in the flash image file.
|
||||||
NVS corruption detection on next read.
|
Without a flash path, falls back to a read-only probe via monitor.
|
||||||
"""
|
"""
|
||||||
# ESP32-S3 flash is mapped at 0x3C000000 (instruction) / 0x3D000000 (data)
|
if flash_path and os.path.isfile(flash_path):
|
||||||
# NVS at flash offset 0x9000 maps to 0x3C009000 in QEMU memory
|
nvs_offset = 0x9000
|
||||||
nvs_flash_addr = 0x3C009000
|
garbage = bytes(random.randint(0, 255) for _ in range(16))
|
||||||
print(f"[nvs_corrupt] Writing garbage to NVS region 0x{nvs_flash_addr:08X}...")
|
with open(flash_path, "r+b") as f:
|
||||||
|
f.seek(nvs_offset)
|
||||||
# Read current NVS header
|
f.write(garbage)
|
||||||
resp = send_cmd(s, f"xp /8xb 0x{nvs_flash_addr:08x}")
|
print(f"[nvs_corrupt] Wrote 16 garbage bytes at flash offset 0x{nvs_offset:X}")
|
||||||
print(f"[nvs_corrupt] NVS header before: {resp.strip()}")
|
print(f"[nvs_corrupt] Flash image: {flash_path}")
|
||||||
|
else:
|
||||||
# Attempt to corrupt the NVS page header (first 32 bytes)
|
# Fallback: attempt via monitor (read-only probe)
|
||||||
# NVS page magic is 0xFE (active) or 0xFC (full)
|
resp = send_cmd(s, f"xp /8xb 0x3C009000")
|
||||||
# Writing 0x00 makes it appear as an uninitialized page
|
print(f"[nvs_corrupt] NVS region (read-only probe): {resp.strip()}")
|
||||||
resp = send_cmd(s, f"x /1xw 0x{nvs_flash_addr:08x}")
|
print(f"[nvs_corrupt] WARNING: No --flash path provided; NVS corruption was NOT injected")
|
||||||
print(f"[nvs_corrupt] Injected: NVS region corruption at 0x{nvs_flash_addr:08X}")
|
print(f"[nvs_corrupt] Pass --flash /path/to/flash.bin for actual corruption")
|
||||||
|
|
||||||
|
|
||||||
# Map fault names to injection functions
|
# Map fault names to injection functions
|
||||||
|
|
@ -235,6 +226,10 @@ def main():
|
||||||
"--timeout", type=float, default=CMD_TIMEOUT,
|
"--timeout", type=float, default=CMD_TIMEOUT,
|
||||||
help=f"Per-command timeout in seconds (default: {CMD_TIMEOUT})",
|
help=f"Per-command timeout in seconds (default: {CMD_TIMEOUT})",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--flash", default=None,
|
||||||
|
help="Path to flash image (for nvs_corrupt direct file writes)",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
print(f"[inject_fault] Connecting to {args.socket}...")
|
print(f"[inject_fault] Connecting to {args.socket}...")
|
||||||
|
|
@ -242,7 +237,14 @@ def main():
|
||||||
|
|
||||||
print(f"[inject_fault] Injecting fault: {args.fault}")
|
print(f"[inject_fault] Injecting fault: {args.fault}")
|
||||||
try:
|
try:
|
||||||
FAULT_MAP[args.fault](s)
|
fault_fn = FAULT_MAP[args.fault]
|
||||||
|
# Pass flash_path to faults that accept it
|
||||||
|
import inspect
|
||||||
|
sig = inspect.signature(fault_fn)
|
||||||
|
if "flash_path" in sig.parameters:
|
||||||
|
fault_fn(s, flash_path=args.flash)
|
||||||
|
else:
|
||||||
|
fault_fn(s)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"ERROR: Fault injection failed: {e}", file=sys.stderr)
|
print(f"ERROR: Fault injection failed: {e}", file=sys.stderr)
|
||||||
s.close()
|
s.close()
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@
|
||||||
# Fault types:
|
# Fault types:
|
||||||
# 1. wifi_kill — Pause/resume VM to simulate WiFi reconnect
|
# 1. wifi_kill — Pause/resume VM to simulate WiFi reconnect
|
||||||
# 2. ring_flood — Inject 1000 rapid mock frames (ring buffer stress)
|
# 2. ring_flood — Inject 1000 rapid mock frames (ring buffer stress)
|
||||||
# 3. heap_pressure — Write to heap metadata to simulate low memory
|
# 3. heap_exhaust — Write to heap metadata to simulate low memory
|
||||||
# 4. timer_starvation — Pause VM for 500ms to starve FreeRTOS timers
|
# 4. timer_starvation — Pause VM for 500ms to starve FreeRTOS timers
|
||||||
# 5. corrupt_frame — Inject a CSI frame with bad magic bytes
|
# 5. corrupt_frame — Inject a CSI frame with bad magic bytes
|
||||||
# 6. nvs_corrupt — Write garbage to NVS flash region
|
# 6. nvs_corrupt — Write garbage to NVS flash region
|
||||||
|
|
@ -37,7 +37,7 @@ followed by a recovery window and health check.
|
||||||
Fault types:
|
Fault types:
|
||||||
wifi_kill Pause/resume VM to simulate WiFi reconnect
|
wifi_kill Pause/resume VM to simulate WiFi reconnect
|
||||||
ring_flood Inject 1000 rapid mock frames (ring buffer stress)
|
ring_flood Inject 1000 rapid mock frames (ring buffer stress)
|
||||||
heap_pressure Write to heap metadata to simulate low memory
|
heap_exhaust Write to heap metadata to simulate low memory
|
||||||
timer_starvation Pause VM for 500ms to starve FreeRTOS timers
|
timer_starvation Pause VM for 500ms to starve FreeRTOS timers
|
||||||
corrupt_frame Inject a CSI frame with bad magic bytes
|
corrupt_frame Inject a CSI frame with bad magic bytes
|
||||||
nvs_corrupt Write garbage to NVS flash region
|
nvs_corrupt Write garbage to NVS flash region
|
||||||
|
|
@ -84,7 +84,7 @@ UART_LOG="$LOG_DIR/qemu_uart.log"
|
||||||
QEMU_PID=""
|
QEMU_PID=""
|
||||||
|
|
||||||
# Fault definitions
|
# Fault definitions
|
||||||
FAULTS=("wifi_kill" "ring_flood" "heap_pressure" "timer_starvation" "corrupt_frame" "nvs_corrupt")
|
FAULTS=("wifi_kill" "ring_flood" "heap_exhaust" "timer_starvation" "corrupt_frame" "nvs_corrupt")
|
||||||
declare -a FAULT_RESULTS=()
|
declare -a FAULT_RESULTS=()
|
||||||
|
|
||||||
# ──────────────────────────────────────────────────────────────────────
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
@ -152,10 +152,10 @@ inject_ring_flood() {
|
||||||
--fault ring_flood
|
--fault ring_flood
|
||||||
}
|
}
|
||||||
|
|
||||||
inject_heap_pressure() {
|
inject_heap_exhaust() {
|
||||||
# Use monitor to simulate memory pressure by writing to heap tracking
|
# Simulate memory pressure by pausing the VM to stress heap management.
|
||||||
# regions. The firmware's heap checks should detect and handle this.
|
# Actual heap memory writes require GDB stub.
|
||||||
echo " [inject] Simulating heap pressure via memory write..."
|
echo " [inject] Simulating heap pressure via VM pause..."
|
||||||
python3 "$SCRIPT_DIR/inject_fault.py" \
|
python3 "$SCRIPT_DIR/inject_fault.py" \
|
||||||
--socket "$MONITOR_SOCK" \
|
--socket "$MONITOR_SOCK" \
|
||||||
--fault heap_exhaust
|
--fault heap_exhaust
|
||||||
|
|
@ -180,12 +180,13 @@ inject_corrupt_frame() {
|
||||||
}
|
}
|
||||||
|
|
||||||
inject_nvs_corrupt() {
|
inject_nvs_corrupt() {
|
||||||
# Write garbage to the NVS flash region (offset 0x9000).
|
# Write garbage to the NVS flash region (offset 0x9000) via direct file write.
|
||||||
# The firmware should detect NVS corruption and fall back to defaults.
|
# The firmware should detect NVS corruption and fall back to defaults.
|
||||||
echo " [inject] Corrupting NVS flash region..."
|
echo " [inject] Corrupting NVS flash region..."
|
||||||
python3 "$SCRIPT_DIR/inject_fault.py" \
|
python3 "$SCRIPT_DIR/inject_fault.py" \
|
||||||
--socket "$MONITOR_SOCK" \
|
--socket "$MONITOR_SOCK" \
|
||||||
--fault nvs_corrupt
|
--fault nvs_corrupt \
|
||||||
|
--flash "$FLASH_IMAGE"
|
||||||
}
|
}
|
||||||
|
|
||||||
# ──────────────────────────────────────────────────────────────────────
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
@ -307,7 +308,7 @@ for fault in "${FAULTS[@]}"; do
|
||||||
case "$fault" in
|
case "$fault" in
|
||||||
wifi_kill) inject_wifi_kill ;;
|
wifi_kill) inject_wifi_kill ;;
|
||||||
ring_flood) inject_ring_flood ;;
|
ring_flood) inject_ring_flood ;;
|
||||||
heap_pressure) inject_heap_pressure ;;
|
heap_exhaust) inject_heap_exhaust ;;
|
||||||
timer_starvation) inject_timer_starvation ;;
|
timer_starvation) inject_timer_starvation ;;
|
||||||
corrupt_frame) inject_corrupt_frame ;;
|
corrupt_frame) inject_corrupt_frame ;;
|
||||||
nvs_corrupt) inject_nvs_corrupt ;;
|
nvs_corrupt) inject_nvs_corrupt ;;
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,8 @@
|
||||||
#
|
#
|
||||||
# Environment variables:
|
# Environment variables:
|
||||||
# QEMU_PATH - Path to qemu-system-xtensa (default: qemu-system-xtensa)
|
# QEMU_PATH - Path to qemu-system-xtensa (default: qemu-system-xtensa)
|
||||||
# MESH_TIMEOUT - Timeout in seconds (default: 45)
|
# QEMU_TIMEOUT - Timeout in seconds (default: 45)
|
||||||
|
# MESH_TIMEOUT - Deprecated alias for QEMU_TIMEOUT
|
||||||
# SKIP_BUILD - Set to "1" to skip the idf.py build step
|
# SKIP_BUILD - Set to "1" to skip the idf.py build step
|
||||||
# BRIDGE_NAME - Bridge interface name (default: qemu-br0)
|
# BRIDGE_NAME - Bridge interface name (default: qemu-br0)
|
||||||
# BRIDGE_SUBNET - Bridge IP/mask (default: 10.0.0.1/24)
|
# BRIDGE_SUBNET - Bridge IP/mask (default: 10.0.0.1/24)
|
||||||
|
|
@ -391,7 +392,7 @@ VALIDATE_ARGS=("--nodes" "$N_NODES")
|
||||||
|
|
||||||
# Pass results file if it was produced
|
# Pass results file if it was produced
|
||||||
if [ -f "$RESULTS_FILE" ]; then
|
if [ -f "$RESULTS_FILE" ]; then
|
||||||
VALIDATE_ARGS+=("$RESULTS_FILE")
|
VALIDATE_ARGS+=("--results" "$RESULTS_FILE")
|
||||||
else
|
else
|
||||||
echo "WARNING: Aggregator results file not found: $RESULTS_FILE"
|
echo "WARNING: Aggregator results file not found: $RESULTS_FILE"
|
||||||
echo "Validation will rely on node logs only."
|
echo "Validation will rely on node logs only."
|
||||||
|
|
|
||||||
|
|
@ -97,9 +97,11 @@ trap cleanup EXIT INT TERM
|
||||||
# ──────────────────────────────────────────────────────────────────────
|
# ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
now_ms() {
|
now_ms() {
|
||||||
# Millisecond timestamp (portable: uses date +%s%N on Linux, perl fallback)
|
# Millisecond timestamp (portable: Linux date +%s%N, macOS perl fallback)
|
||||||
if date +%s%N &>/dev/null; then
|
local ns
|
||||||
echo $(( $(date +%s%N) / 1000000 ))
|
ns=$(date +%s%N 2>/dev/null)
|
||||||
|
if [[ "$ns" =~ ^[0-9]+$ ]]; then
|
||||||
|
echo $(( ns / 1000000 ))
|
||||||
else
|
else
|
||||||
perl -MTime::HiRes=time -e 'printf "%d\n", time()*1000' 2>/dev/null || \
|
perl -MTime::HiRes=time -e 'printf "%d\n", time()*1000' 2>/dev/null || \
|
||||||
echo $(( $(date +%s) * 1000 ))
|
echo $(( $(date +%s) * 1000 ))
|
||||||
|
|
@ -284,15 +286,15 @@ for test_name in "${TESTS[@]}"; do
|
||||||
# Restore to post_first_frame state
|
# Restore to post_first_frame state
|
||||||
restore_snapshot "post_first_frame"
|
restore_snapshot "post_first_frame"
|
||||||
|
|
||||||
# Clear the UART log for this test segment
|
# Record current log length so we can extract only new lines
|
||||||
> "$LOG_DIR/qemu_uart.log"
|
pre_lines=$(wc -l < "$LOG_DIR/qemu_uart.log" 2>/dev/null || echo 0)
|
||||||
|
|
||||||
# Let execution continue for TIMEOUT_SEC seconds
|
# Let execution continue for TIMEOUT_SEC seconds
|
||||||
echo "[test] Running for ${TIMEOUT_SEC}s..."
|
echo "[test] Running for ${TIMEOUT_SEC}s..."
|
||||||
sleep "$TIMEOUT_SEC"
|
sleep "$TIMEOUT_SEC"
|
||||||
|
|
||||||
# Capture the log segment for this test
|
# Capture only the new log lines produced during this test
|
||||||
cp "$LOG_DIR/qemu_uart.log" "$test_log"
|
tail -n +$((pre_lines + 1)) "$LOG_DIR/qemu_uart.log" > "$test_log"
|
||||||
|
|
||||||
t_end=$(now_ms)
|
t_end=$(now_ms)
|
||||||
elapsed_ms=$((t_end - t_start))
|
elapsed_ms=$((t_end - t_start))
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,9 @@
|
||||||
QEMU ESP32-S3 UART Output Validator (ADR-061)
|
QEMU ESP32-S3 UART Output Validator (ADR-061)
|
||||||
|
|
||||||
Parses the UART log captured from a QEMU firmware run and validates
|
Parses the UART log captured from a QEMU firmware run and validates
|
||||||
14 checks covering boot, NVS, mock CSI, edge processing, vitals,
|
16 checks covering boot, NVS, mock CSI, edge processing, vitals,
|
||||||
presence/fall detection, serialization, and crash indicators.
|
presence/fall detection, serialization, crash indicators, scenario
|
||||||
|
completion, and frame rate sanity.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python3 validate_qemu_output.py <log_file>
|
python3 validate_qemu_output.py <log_file>
|
||||||
|
|
@ -120,7 +121,7 @@ class ValidationReport:
|
||||||
|
|
||||||
|
|
||||||
def validate_log(log_text: str) -> ValidationReport:
|
def validate_log(log_text: str) -> ValidationReport:
|
||||||
"""Run all 14 validation checks against the UART log text."""
|
"""Run all 16 validation checks against the UART log text."""
|
||||||
report = ValidationReport()
|
report = ValidationReport()
|
||||||
lines = log_text.splitlines()
|
lines = log_text.splitlines()
|
||||||
log_lower = log_text.lower()
|
log_lower = log_text.lower()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue