fix(firmware): move defensive node_id capture before wifi_init_sta()

The original defensive copy in csi_collector_init() (line 172 of main.c)
runs AFTER wifi_init_sta() (line 147), which on some ESP32-S3 devices
corrupts g_nvs_config.node_id back to the Kconfig default of 1.

Reproduced on device 80:b5:4e:c1:be:b8 (ESP32-S3 QFN56 rev v0.2):
  - NVS provisioned with node_id=5
  - Release firmware (no fix): seed receives node_id=1 (clobbered)
  - This patch: seed receives node_id=5 (correct)

Changes:
  - Add csi_collector_set_node_id() called from main.c immediately
    after nvs_config_load(), before wifi_init_sta() runs
  - csi_collector_init() now detects and logs the clobber if early
    capture disagrees with current g_nvs_config value
  - Fallback path preserved: if set_node_id() is never called,
    init() still captures from g_nvs_config (backwards compatible)

Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
Dragan Spiridonov 2026-04-16 11:50:08 +02:00 committed by ruv
parent b123879b25
commit 415cb5606d
3 changed files with 50 additions and 28 deletions

View File

@ -25,13 +25,13 @@
/* ADR-060: Access the global NVS config for MAC filter and channel override. */
extern nvs_config_t g_nvs_config;
/* Defensive fix (#232, #375, #385, #386, #390): capture node_id at init-time
* into a module-local static. Using the global g_nvs_config.node_id directly
* at every callback is vulnerable to any memory corruption that clobbers the
* struct (which users have reported reverting node_id to the Kconfig default
* of 1). The local copy is set once at csi_collector_init() and then used
* exclusively by csi_serialize_frame(). */
/* Defensive fix (#232, #375, #385, #386, #390): capture node_id into a
* module-local static BEFORE wifi_init_sta() runs, because WiFi driver init
* can corrupt g_nvs_config.node_id (confirmed on device 80:b5:4e:c1:be:b8).
* main.c calls csi_collector_set_node_id() immediately after nvs_config_load(),
* and csi_serialize_frame() uses s_node_id exclusively. */
static uint8_t s_node_id = 1;
static bool s_node_id_early_set = false;
/* ADR-057: Build-time guard — fail early if CSI is not enabled in sdkconfig.
* Without this, the firmware compiles but crashes at runtime with:
@ -222,14 +222,31 @@ static void wifi_promiscuous_cb(void *buf, wifi_promiscuous_pkt_type_t type)
(void)type;
}
void csi_collector_set_node_id(uint8_t node_id)
{
s_node_id = node_id;
s_node_id_early_set = true;
ESP_LOGI(TAG, "Early capture node_id=%u (before WiFi init, #232/#390)",
(unsigned)node_id);
}
void csi_collector_init(void)
{
/* Capture node_id into module-local static at init time. After this point
* csi_serialize_frame() uses s_node_id exclusively, isolating the UDP
* frame node_id field from any memory corruption of g_nvs_config. */
s_node_id = g_nvs_config.node_id;
ESP_LOGI(TAG, "Captured node_id=%u at init (defensive copy for #232/#375/#385/#390)",
(unsigned)s_node_id);
if (!s_node_id_early_set) {
/* Fallback: no early capture — use current g_nvs_config (may be clobbered). */
s_node_id = g_nvs_config.node_id;
ESP_LOGW(TAG, "Late capture node_id=%u (no early set_node_id call)",
(unsigned)s_node_id);
} else if (g_nvs_config.node_id != s_node_id) {
/* Canary: early capture disagrees with current g_nvs_config — corruption
* happened between nvs_config_load() and here (likely wifi_init_sta). */
ESP_LOGW(TAG, "node_id clobber CONFIRMED: early=%u g_nvs_config=%u "
"(WiFi init likely corrupted struct, using early value)",
(unsigned)s_node_id, (unsigned)g_nvs_config.node_id);
} else {
ESP_LOGI(TAG, "node_id=%u verified (early capture matches g_nvs_config)",
(unsigned)s_node_id);
}
/* ADR-060: Determine the CSI channel.
* Priority: 1) NVS override (--channel), 2) connected AP channel, 3) Kconfig default. */
@ -290,16 +307,6 @@ void csi_collector_init(void)
ESP_LOGI(TAG, "CSI collection initialized (node_id=%u, channel=%u)",
(unsigned)s_node_id, (unsigned)csi_channel);
/* Clobber-detection canary: if g_nvs_config.node_id no longer matches the
* value we captured, something corrupted the struct between nvs_config_load
* and here. This is the historic #232/#375 symptom. */
if (g_nvs_config.node_id != s_node_id) {
ESP_LOGW(TAG, "node_id clobber detected: captured=%u but g_nvs_config=%u "
"(frames will use captured value %u). Please report to #390.",
(unsigned)s_node_id, (unsigned)g_nvs_config.node_id,
(unsigned)s_node_id);
}
}
/* Accessor for other modules that need the authoritative runtime node_id. */

View File

@ -30,14 +30,24 @@
void csi_collector_init(void);
/**
* Get the runtime node_id captured at csi_collector_init().
* Capture node_id BEFORE wifi_init_sta() or any other heavy init.
*
* This is a defensive copy of g_nvs_config.node_id taken at init time. Other
* modules (edge_processing, wasm_runtime, display_ui) should prefer this
* accessor over reading g_nvs_config.node_id directly, because the global
* struct can be clobbered by memory corruption (see #232, #375, #385, #390).
* Must be called from app_main() immediately after nvs_config_load().
* WiFi driver initialization can corrupt g_nvs_config.node_id (confirmed
* on device 80:b5:4e:c1:be:b8, NVS=3 but post-WiFi reads as 1).
* This early capture shields s_node_id from that corruption window.
*
* @return Node ID (0-255) as loaded from NVS or Kconfig default at boot.
* @param node_id Value from g_nvs_config.node_id, read right after NVS load.
*/
void csi_collector_set_node_id(uint8_t node_id);
/**
* Get the runtime node_id (early capture if available, otherwise init-time).
*
* Other modules (edge_processing, wasm_runtime, display_ui) should prefer
* this accessor over reading g_nvs_config.node_id directly.
*
* @return Node ID (0-255) as loaded from NVS at boot.
*/
uint8_t csi_collector_get_node_id(void);

View File

@ -140,6 +140,11 @@ void app_main(void)
/* Load runtime config (NVS overrides Kconfig defaults) */
nvs_config_load(&g_nvs_config);
/* Capture node_id IMMEDIATELY — before wifi_init_sta() can corrupt
* g_nvs_config. See #232/#375/#390: WiFi driver init clobbers the struct
* on some devices, reverting node_id to the Kconfig default of 1. */
csi_collector_set_node_id(g_nvs_config.node_id);
const esp_app_desc_t *app_desc = esp_app_get_description();
ESP_LOGI(TAG, "ESP32-S3 CSI Node (ADR-018) — v%s — Node ID: %d",
app_desc->version, g_nvs_config.node_id);