From 7831f294369d431778b4d182e7a80f463f4c4824 Mon Sep 17 00:00:00 2001 From: rUv Date: Mon, 22 Jun 2026 12:31:21 -0400 Subject: [PATCH] fix(firmware): phantom LD2410 detection + ENOMEM backoff (#1135) (#1159) Bug #2 (root cause): LD2410 probe-detection matched only the 4-byte head 0xF4F3F2F1, so a floating UART at 256000 baud could phantom-detect a sensor and spawn a UART task. Now requires a full validated report frame (head + sane length + tail 0xF8F7F6F5), extracted to mmwave_detect.h and shared with a host unit test (test_mmwave_detect.c, 8 vectors) so firmware and test can't diverge. Matches the validate-before-trust approach used for MR60 in #1107. Bug #1: sendto ENOMEM used a fixed 100 ms backoff too short to drain sustained lwIP/WiFi buffer pressure, so a node could stay stuck. Now exponential (100->200->...->2000 ms per consecutive ENOMEM, reset on first successful send). Removing the phantom LD2410 task (bug #2) also removes the extra load that tipped the reporter's tier-2 node into the stuck state. Validated on ESP32-S3 QFN56 rev v0.2 (the reporter's silicon): tier-2 streams ~100 frames/s with no stuck ENOMEM and correctly reports no mmWave (no phantom). LD2410 predicate truth table proven (head-without-tail REJECTED). Could not reproduce the reporter's environment-specific floating-pin noise, so the deterministic proof is the host unit test. --- firmware/esp32-csi-node/main/mmwave_detect.h | 37 +++++++++ firmware/esp32-csi-node/main/mmwave_sensor.c | 11 ++- firmware/esp32-csi-node/main/stream_sender.c | 25 ++++-- firmware/esp32-csi-node/test/Makefile | 19 ++++- .../esp32-csi-node/test/test_mmwave_detect.c | 80 +++++++++++++++++++ 5 files changed, 159 insertions(+), 13 deletions(-) create mode 100644 firmware/esp32-csi-node/main/mmwave_detect.h create mode 100644 firmware/esp32-csi-node/test/test_mmwave_detect.c diff --git a/firmware/esp32-csi-node/main/mmwave_detect.h b/firmware/esp32-csi-node/main/mmwave_detect.h new file mode 100644 index 00000000..2823aff8 --- /dev/null +++ b/firmware/esp32-csi-node/main/mmwave_detect.h @@ -0,0 +1,37 @@ +/** + * @file mmwave_detect.h + * @brief Pure (host-testable) mmWave frame-validation predicates for probe-time + * sensor detection. No ESP-IDF deps — safe to #include in a host unit test. + * + * Detection must validate a *full* frame, never a bare header byte/pattern: a + * floating UART with no sensor reads line noise that can contain header-looking + * bytes, which the old loose checks mistook for a real sensor (#1107 MR60, + * #1135 LD2410). These predicates are the validate-before-trust gate. + */ +#ifndef MMWAVE_DETECT_H +#define MMWAVE_DETECT_H + +#include +#include + +/** + * True iff buf[i..] begins a *validated* LD2410 report frame within [0,len): + * F4 F3 F2 F1 | len(LE,2) | data[len] | F8 F7 F6 F5 + * Requires the head magic, a sane intra-frame length, AND the matching tail at + * head+6+len. Pure noise that merely contains 0xF4F3F2F1 fails the tail check. + */ +static inline bool mmwave_ld2410_valid_at(const uint8_t *buf, int i, int len) +{ + if (i < 0 || i + 5 >= len) return false; + if (!(buf[i] == 0xF4 && buf[i+1] == 0xF3 && buf[i+2] == 0xF2 && buf[i+3] == 0xF1)) + return false; + uint16_t flen = (uint16_t)buf[i+4] | ((uint16_t)buf[i+5] << 8); + /* Real LD2410 report frames are small (basic=13, engineering=35). */ + if (flen < 1 || flen > 64) return false; + int tail = i + 6 + (int)flen; + if (tail + 3 >= len) return false; + return buf[tail] == 0xF8 && buf[tail+1] == 0xF7 + && buf[tail+2] == 0xF6 && buf[tail+3] == 0xF5; +} + +#endif /* MMWAVE_DETECT_H */ diff --git a/firmware/esp32-csi-node/main/mmwave_sensor.c b/firmware/esp32-csi-node/main/mmwave_sensor.c index 3b10afb5..747651c1 100644 --- a/firmware/esp32-csi-node/main/mmwave_sensor.c +++ b/firmware/esp32-csi-node/main/mmwave_sensor.c @@ -26,6 +26,7 @@ */ #include "mmwave_sensor.h" +#include "mmwave_detect.h" #include #include @@ -401,10 +402,12 @@ static mmwave_type_t probe_at_baud(uint32_t baud) } } } - /* LD2410: 4-byte header 0xF4F3F2F1 (already specific enough). */ - if (i + 3 < len && buf[i] == 0xF4 && buf[i+1] == 0xF3 - && buf[i+2] == 0xF2 && buf[i+3] == 0xF1 - && baud == MMWAVE_LD2410_BAUD) { + /* LD2410: require a *full validated* report frame, not just the + * 4-byte head. A floating UART1 at 256000 baud can emit the head + * pattern 0xF4F3F2F1 from line noise (#1135 bug #2). The shared + * predicate (host-unit-tested in mmwave_detect.h) demands a sane + * intra-frame length AND the matching tail 0xF8F7F6F5. */ + if (baud == MMWAVE_LD2410_BAUD && mmwave_ld2410_valid_at(buf, i, len)) { ld2410_header_seen++; } } diff --git a/firmware/esp32-csi-node/main/stream_sender.c b/firmware/esp32-csi-node/main/stream_sender.c index b85c206a..3d2c98f8 100644 --- a/firmware/esp32-csi-node/main/stream_sender.c +++ b/firmware/esp32-csi-node/main/stream_sender.c @@ -26,9 +26,16 @@ static struct sockaddr_in s_dest_addr; * rapid-fire CSI callbacks can exhaust the pbuf pool and crash the device. */ static int64_t s_backoff_until_us = 0; /* esp_timer timestamp to resume */ -#define ENOMEM_COOLDOWN_MS 100 /* suppress sends for 100 ms */ +#define ENOMEM_COOLDOWN_MS 100 /* base backoff; doubles per streak */ +#define ENOMEM_COOLDOWN_MAX_MS 2000 /* cap on the exponential backoff */ #define ENOMEM_LOG_INTERVAL 50 /* log every Nth suppressed send */ static uint32_t s_enomem_suppressed = 0; +/* Consecutive ENOMEM episodes without an intervening successful send. A fixed + * 100 ms backoff is too short to drain sustained lwIP/WiFi buffer pressure + * (#1135 bug #1: tier-2 + concurrent TX keeps the node stuck), so the backoff + * grows 100→200→400→…→2000 ms per streak and resets on the first send that + * succeeds. */ +static uint32_t s_enomem_streak = 0; static int sender_init_internal(const char *ip, uint16_t port) { @@ -93,16 +100,24 @@ int stream_sender_send(const uint8_t *data, size_t len) (struct sockaddr *)&s_dest_addr, sizeof(s_dest_addr)); if (sent < 0) { if (errno == ENOMEM) { - /* Start backoff to let lwIP reclaim buffers */ - s_backoff_until_us = esp_timer_get_time() + - (int64_t)ENOMEM_COOLDOWN_MS * 1000; - ESP_LOGW(TAG, "sendto ENOMEM — backing off for %d ms", ENOMEM_COOLDOWN_MS); + /* Exponential backoff: double the cooldown each consecutive ENOMEM + * (capped) so sustained buffer pressure actually drains instead of + * the node re-failing every 100 ms forever (#1135 bug #1). */ + uint32_t shift = s_enomem_streak < 5 ? s_enomem_streak : 5; + uint32_t cooldown = ENOMEM_COOLDOWN_MS << shift; + if (cooldown > ENOMEM_COOLDOWN_MAX_MS) cooldown = ENOMEM_COOLDOWN_MAX_MS; + s_enomem_streak++; + s_backoff_until_us = esp_timer_get_time() + (int64_t)cooldown * 1000; + ESP_LOGW(TAG, "sendto ENOMEM — backing off for %lu ms (streak %lu)", + (unsigned long)cooldown, (unsigned long)s_enomem_streak); } else { ESP_LOGW(TAG, "sendto failed: errno %d", errno); } return -1; } + /* A send got through — buffer pressure cleared; reset the backoff streak. */ + s_enomem_streak = 0; return sent; } diff --git a/firmware/esp32-csi-node/test/Makefile b/firmware/esp32-csi-node/test/Makefile index 6dd2fa04..b5589945 100644 --- a/firmware/esp32-csi-node/test/Makefile +++ b/firmware/esp32-csi-node/test/Makefile @@ -44,9 +44,9 @@ FUZZ_DURATION ?= 30 FUZZ_JOBS ?= 1 .PHONY: all clean run_serialize run_edge run_nvs run_all test_adr110 run_adr110 \ - test_vitals run_vitals host_tests + test_vitals run_vitals test_mmwave_detect run_mmwave_detect host_tests -all: fuzz_serialize fuzz_edge fuzz_nvs test_adr110 test_vitals +all: fuzz_serialize fuzz_edge fuzz_nvs test_adr110 test_vitals test_mmwave_detect # --- ADR-110 encoding unit tests --- # Host-side, no libFuzzer needed — plain C99 deterministic table tests @@ -69,8 +69,19 @@ test_vitals: test_vitals_count_presence.c $(MAIN_DIR)/edge_processing.h run_vitals: test_vitals ./test_vitals -host_tests: run_adr110 run_vitals - @echo "Host tests passed (ADR-110 + vitals #998/#996)" +# --- mmWave LD2410 detection predicate (#1135 bug #2) --- +# Host-side, no libFuzzer. Proves a floating-UART head pattern (0xF4F3F2F1) +# without a valid frame length+tail is REJECTED, so a phantom LD2410 is never +# detected on a node with no sensor wired. Tests the real predicate the +# firmware uses (../main/mmwave_detect.h) — test and firmware can't disagree. +test_mmwave_detect: test_mmwave_detect.c $(MAIN_DIR)/mmwave_detect.h + cc -std=c99 -Wall -Wextra -I$(MAIN_DIR) -o $@ $< + +run_mmwave_detect: test_mmwave_detect + ./test_mmwave_detect + +host_tests: run_adr110 run_vitals run_mmwave_detect + @echo "Host tests passed (ADR-110 + vitals #998/#996 + mmwave detect #1135)" # --- Serialize fuzzer --- # Tests csi_serialize_frame() with random wifi_csi_info_t inputs. diff --git a/firmware/esp32-csi-node/test/test_mmwave_detect.c b/firmware/esp32-csi-node/test/test_mmwave_detect.c new file mode 100644 index 00000000..0a9fe930 --- /dev/null +++ b/firmware/esp32-csi-node/test/test_mmwave_detect.c @@ -0,0 +1,80 @@ +/** + * @file test_mmwave_detect.c + * @brief Host-side unit tests for the LD2410 frame-validation predicate (#1135). + * + * Proves the phantom-detection fix: a floating UART can emit the 4-byte head + * 0xF4F3F2F1, but the predicate rejects it unless a sane length + matching tail + * 0xF8F7F6F5 are also present. Tests the REAL predicate from mmwave_detect.h + * (the same code the firmware's probe_at_baud calls). + * + * cc -std=c99 -Wall -I../main -o test_mmwave_detect test_mmwave_detect.c && ./test_mmwave_detect + * + * Exits 0 on all-pass; prints the failing case otherwise. + */ +#include +#include +#include +#include "mmwave_detect.h" + +static int failures = 0; +#define CHECK(cond, msg) do { \ + if (!(cond)) { printf("FAIL: %s\n", msg); failures++; } \ + else { printf("ok: %s\n", msg); } \ +} while (0) + +/* Build a valid LD2410 report frame: F4F3F2F1 | len(LE) | data[len] | F8F7F6F5 */ +static int make_frame(uint8_t *out, uint16_t dlen) +{ + int n = 0; + out[n++] = 0xF4; out[n++] = 0xF3; out[n++] = 0xF2; out[n++] = 0xF1; + out[n++] = (uint8_t)(dlen & 0xFF); out[n++] = (uint8_t)(dlen >> 8); + for (uint16_t k = 0; k < dlen; k++) out[n++] = (uint8_t)(0xAA ^ k); + out[n++] = 0xF8; out[n++] = 0xF7; out[n++] = 0xF6; out[n++] = 0xF5; + return n; +} + +int main(void) +{ + uint8_t buf[256]; + + /* 1. A real basic-report frame (data len 13) validates. */ + int n = make_frame(buf, 13); + CHECK(mmwave_ld2410_valid_at(buf, 0, n), "valid basic frame (len=13) accepted"); + + /* 2. A real engineering-report frame (data len 35) validates. */ + n = make_frame(buf, 35); + CHECK(mmwave_ld2410_valid_at(buf, 0, n), "valid engineering frame (len=35) accepted"); + + /* 3. Head magic present but NO valid tail — the #1135 phantom case. */ + memset(buf, 0x00, sizeof(buf)); + buf[0]=0xF4; buf[1]=0xF3; buf[2]=0xF2; buf[3]=0xF1; buf[4]=13; buf[5]=0; + /* data present but tail is zeros, not F8F7F6F5 */ + CHECK(!mmwave_ld2410_valid_at(buf, 0, 64), "head magic without valid tail REJECTED (#1135)"); + + /* 4. Head magic with insane length is rejected. */ + memset(buf, 0xFF, sizeof(buf)); + buf[0]=0xF4; buf[1]=0xF3; buf[2]=0xF2; buf[3]=0xF1; buf[4]=0xFF; buf[5]=0xFF; /* len=65535 */ + CHECK(!mmwave_ld2410_valid_at(buf, 0, 200), "head magic with oversized length REJECTED"); + + /* 5. Pure noise (no head) is rejected. */ + for (int k = 0; k < 64; k++) buf[k] = (uint8_t)(0x5A + k); + CHECK(!mmwave_ld2410_valid_at(buf, 0, 64), "non-header noise REJECTED"); + + /* 6. Truncated frame (tail would run past the buffer) is rejected. */ + n = make_frame(buf, 13); + CHECK(!mmwave_ld2410_valid_at(buf, 0, n - 2), "truncated frame (tail past buffer) REJECTED"); + + /* 7. Valid frame at a non-zero offset still validates. */ + memset(buf, 0x00, sizeof(buf)); + n = make_frame(buf + 7, 13); + CHECK(mmwave_ld2410_valid_at(buf, 7, 7 + n), "valid frame at offset 7 accepted"); + + /* 8. Repeated head bytes without a frame (worst-case noise) rejected. */ + for (int k = 0; k + 3 < 64; k += 4) { + buf[k]=0xF4; buf[k+1]=0xF3; buf[k+2]=0xF2; buf[k+3]=0xF1; + } + CHECK(!mmwave_ld2410_valid_at(buf, 0, 64), "repeated bare head bytes REJECTED"); + + printf("\n%s (%d failures)\n", failures ? "FAILED" : "ALL PASS", failures); + return failures ? 1 : 0; +}