From 02cb84e0bb6c6dc4494b17388b43bd396869ae61 Mon Sep 17 00:00:00 2001 From: rUv Date: Sun, 14 Jun 2026 18:01:47 -0400 Subject: [PATCH] fix(vitals safety): non-finite CSI frame permanently froze breathing+HR via IIR-state poisoning (self-heal) + noise-never-Valid pin (#1079) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(vitals): self-heal IIR filters after non-finite CSI frame (ADR-021/ADR-158 §A1) The 2nd-order resonator bandpass_filter in BreathingExtractor and HeartRateExtractor latches each output y[n] into the filter state (y1/y2). A single non-finite amplitude residual from a corrupt CSI frame produced a NaN output that was written into the state. The existing extract() is_finite() guard dropped that one sample from the history buffer but never sanitized the poisoned filter state, so every subsequent output stayed NaN, was rejected too, and the sliding-window history never refilled: breathing AND heart-rate extraction went silently dead (returning None forever) until reset(). On the vitals alert path this is a safety-relevant denial of service — one bad frame stops monitoring with no error surfaced. Same class as the calibration NaN bug (ADR-154 §3) and the firmware vitals fixes (#998/#996/#987): prior hardening guarded the history boundary but not the filter-state boundary. Fix: when bandpass_filter computes a non-finite output it resets the IIR state to default and returns 0.0, so the resonator recovers on the next clean frame (the 0.0 is still dropped by the caller's finite-check, so no spurious sample enters history). Also de-magic the safety-critical HR physiological plausibility band into named HR_PLAUSIBLE_MIN_BPM/HR_PLAUSIBLE_MAX_BPM consts (value-identical 40/180 BPM). Pinned by: - breathing::tests::nan_frame_does_not_permanently_poison_filter (FAILS pre-fix) - breathing::tests::inf_mid_stream_does_not_freeze_history (FAILS pre-fix) - heartrate::tests::nan_frame_does_not_permanently_poison_filter (FAILS pre-fix) - heartrate::tests::pure_noise_is_never_reported_valid (fabricated-vital negative) - heartrate::tests::plausibility_band_constants_pinned (de-magic value pin) wifi-densepose-vitals --no-default-features: 55->60 lib tests, 0 failed. Workspace green (3370 passed, 0 failed). Python proof unchanged (vitals off the deterministic proof's signal path). Co-Authored-By: claude-flow * docs(vitals): record IIR NaN/inf self-heal fix (ADR-021, CHANGELOG) Document the wifi-densepose-vitals filter-state poisoning fix in ADR-021 Implementation Notes (parallel to the firmware #998/#996/#987 robustness class) and add a CHANGELOG [Unreleased] Fixed entry. Notes the confirmed clean dimensions with evidence (flat -> None; noise -> low-confidence Unreliable, never Valid; harmonic-rich breathing -> not a confident false HR; out-of-band BPM clamped). Co-Authored-By: claude-flow --- CHANGELOG.md | 1 + ...021-vital-sign-detection-rvdna-pipeline.md | 6 + .../wifi-densepose-vitals/src/breathing.rs | 83 +++++++++++++ .../wifi-densepose-vitals/src/heartrate.rs | 116 +++++++++++++++++- 4 files changed, 204 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca12152e..dd60d0c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **#3 JWT-in-URL (CWE-598) — VERIFIED ABSENT, regression-pinned.** `require_bearer` reads the token only from the `Authorization` header; the WebSocket handlers take no token query param and the sole `Query` extractor (`EdgeRegistryParams`) is a non-secret `refresh` flag. Added a regression proving `?token=`/`?access_token=` in the URL never authenticates while the header path still does. ### Fixed +- **Vitals IIR filters self-heal after a non-finite CSI frame — a single NaN/inf no longer permanently kills breathing & heart-rate extraction (`wifi-densepose-vitals`, safety; ADR-021 / ADR-158 §A1).** The 2nd-order resonator in `breathing::BreathingExtractor::bandpass_filter` and `heartrate::HeartRateExtractor::bandpass_filter` latches each output `y[n]` into the filter state (`y1`/`y2`). A non-finite input — one NaN/inf amplitude residual from a corrupt CSI frame — produced a NaN `output` that was written into the state. The existing `extract()` `is_finite()` guard correctly dropped that single sample from history, **but never sanitized the poisoned filter state**, so every subsequent output stayed NaN, was rejected too, and the sliding-window history *never refilled*: the extractor went silently dead (returning `None` forever) until `reset()`. On the vitals alert path this is a safety-relevant denial of service — one bad frame and breathing **and** heart-rate monitoring stop, with no error surfaced. Fix: when `bandpass_filter` computes a non-finite `output` it now resets the IIR state to default and returns `0.0`, so the resonator recovers on the next clean frame (the `0.0` is still dropped by the caller's finite-check — no spurious sample enters history). Same class as the calibration NaN bug (ADR-154 §3) and the firmware vitals fixes (#998/#996/#987): the prior hardening guarded the *history boundary* but not the *filter-state boundary*. Pinned by `breathing::tests::nan_frame_does_not_permanently_poison_filter`, `breathing::tests::inf_mid_stream_does_not_freeze_history`, and `heartrate::tests::nan_frame_does_not_permanently_poison_filter` (all three FAIL on the pre-fix code, verified by reverting). Also de-magicked the safety-critical HR physiological plausibility band into named `HR_PLAUSIBLE_MIN_BPM`/`HR_PLAUSIBLE_MAX_BPM` consts (value-identical 40/180 BPM, pinned by `plausibility_band_constants_pinned`) and added a fabricated-vital negative (`pure_noise_is_never_reported_valid` — broadband noise never yields a clinically `Valid` HR). `wifi-densepose-vitals --no-default-features`: 55→60 lib tests, 0 failed; workspace green; Python proof unchanged (vitals is off the deterministic proof's signal path). - **BFLD MQTT `zone_activity` payload now JSON-escapes the zone name (`wifi-densepose-bfld`).** `mqtt_topics::render_events` emitted the zone payload as `format!("\"{zone}\"")` with no escaping, while `ha_discovery.rs` already escapes operator-controlled strings. A zone name containing a `"` or `\` produced malformed/injectable JSON on the Home-Assistant state topic (e.g. zone `a"b` → payload `"a"b"`). Added a `json_string_literal` escaper mirroring `ha_discovery::push_str_field` and applied it to the zone payload — value-identical for normal zone names (`living_room`, …). Pinned by `zone_payload_escapes_json_metacharacters` (FAILED pre-fix; round-trips through `serde_json`); the existing `zone_payload_is_json_string_with_quotes` still passes unchanged. - **ESP32 vitals: `n_persons` over-counted (reported 4 for one person) + presence flag flickered at close range (#998, #996).** Two firmware logic bugs in `firmware/esp32-csi-node/main/edge_processing.c`, both robustness/logic fixes — **not** validated-accuracy claims (true count/PCK vs labelled ground truth stays hardware/data-gated on the COM9 ESP32-S3). - **#998 over-count — root cause + fix.** `update_multi_person_vitals()` split the top-K subcarriers into `top_k_count/2` groups and marked **every** group `active` unconditionally, so one body's multipath always reported the full `EDGE_MAX_PERSONS` (=4). New pure, host-testable `count_distinct_persons()` gates each candidate group: (1) **energy gate** — a group's phase variance must be ≥ `EDGE_PERSON_MIN_ENERGY_RATIO` (0.35) × the strongest group's, so weak multipath echoes don't count; (2) **spatial dedup** — groups whose representative subcarriers sit within `EDGE_PERSON_MIN_SC_SEP` (4) of each other are the same body. A `person_count_debounce()` then requires the gated count to hold `EDGE_PERSON_PERSIST_FRAMES` (3) consecutive frames before it's emitted, so a single noisy frame can't promote a phantom. The strongest group always counts (a present body yields ≥1). All thresholds are named, documented constants in `edge_processing.h`. diff --git a/docs/adr/ADR-021-vital-sign-detection-rvdna-pipeline.md b/docs/adr/ADR-021-vital-sign-detection-rvdna-pipeline.md index fc85b25f..a89a57ad 100644 --- a/docs/adr/ADR-021-vital-sign-detection-rvdna-pipeline.md +++ b/docs/adr/ADR-021-vital-sign-detection-rvdna-pipeline.md @@ -1092,6 +1092,12 @@ Two robustness bugs were fixed in the on-device edge path (`firmware/esp32-csi-n Both are pinned by host-buildable C99 tests in `firmware/esp32-csi-node/test/test_vitals_count_presence.c` (`make run_vitals`). The exact thresholds are documented constants pending on-device calibration against ground truth. +### 2026-06 — Rust `wifi-densepose-vitals`: IIR filter NaN/inf self-heal (ADR-158 §A1) + +A correctness/safety review of the Rust extraction crate found a real bug parallel to the firmware robustness class above. The 2nd-order resonator `bandpass_filter` in both `breathing.rs` and `heartrate.rs` latches each output `y[n]` into its filter state (`y1`/`y2`). A single non-finite amplitude residual from a corrupt CSI frame produced a NaN `output` that was written into the state; the existing `extract()` `is_finite()` guard dropped that one sample from the history buffer **but never sanitized the poisoned filter state**, so every later output stayed NaN, was rejected too, and the sliding-window history never refilled — breathing **and** heart-rate extraction went silently dead (returning `None` forever) until `reset()`. On the alert path this is a safety-relevant denial of service (one bad frame stops vitals monitoring with no error surfaced). + +Fix: when `bandpass_filter` computes a non-finite `output`, it resets the IIR state to default and returns `0.0`, so the resonator self-heals on the next clean frame (the `0.0` is still dropped by the caller's finite-check, so no spurious sample enters history). Same shape as the calibration NaN bug (ADR-154 §3) — the prior hardening guarded the *history boundary* but not the *filter-state boundary*. Pinned by `breathing::tests::nan_frame_does_not_permanently_poison_filter`, `breathing::tests::inf_mid_stream_does_not_freeze_history`, and `heartrate::tests::nan_frame_does_not_permanently_poison_filter` (all FAIL pre-fix, verified by reverting). The review also de-magicked the HR physiological plausibility band into named `HR_PLAUSIBLE_MIN_BPM`/`HR_PLAUSIBLE_MAX_BPM` consts (value-identical 40/180 BPM) and added a fabricated-vital negative (`pure_noise_is_never_reported_valid` — broadband noise never yields a clinically `Valid` HR; the extractor honestly returns low-confidence `Unreliable`). Clean dimensions confirmed with evidence: flat/silent input → `None`; pure noise → low-confidence `Unreliable`, never `Valid`; harmonic-rich breathing with no cardiac component → low-confidence, not a confident false HR; out-of-band BPM rejected by the plausibility clamp. + ## References - Ramsauer et al. (2020). "Hopfield Networks is All You Need." ICLR 2021. (ModernHopfield formulation) diff --git a/v2/crates/wifi-densepose-vitals/src/breathing.rs b/v2/crates/wifi-densepose-vitals/src/breathing.rs index 5f375419..1bfd7cd4 100644 --- a/v2/crates/wifi-densepose-vitals/src/breathing.rs +++ b/v2/crates/wifi-densepose-vitals/src/breathing.rs @@ -174,6 +174,20 @@ impl BreathingExtractor { let output = (1.0 - r) * (input - state.x2) + 2.0 * r * cos_w0 * state.y1 - r * r * state.y2; + // Self-healing non-finite guard (ADR-158 §A1). A single non-finite + // sample — a NaN/inf residual from a corrupt CSI frame, or a transient + // overflow — would otherwise be stored into `y1`/`y2` and poison the + // resonator recurrence *permanently*: every subsequent output stays + // NaN, the `extract()` finite-check drops it, and the history buffer + // never refills, so breathing extraction is dead until `reset()`. + // Resetting the filter state here lets the resonator recover on the next + // clean frame; the 0.0 we return for this frame is still dropped by the + // caller's `is_finite()` check, so no spurious sample enters history. + if !output.is_finite() { + *state = IirState::default(); + return 0.0; + } + state.x2 = state.x1; state.x1 = input; state.y2 = state.y1; @@ -396,6 +410,75 @@ mod tests { assert!((0.0..=2.0).contains(&fused), "weighted average must be in-range: {fused}"); } + /// ADR-158 §A1 bug-catching test: a single non-finite residual must NOT + /// permanently poison the IIR filter state. + /// + /// The resonator recurrence stores `y[n]` into the filter state. Before the + /// fix, one NaN/inf residual produced a NaN `output`, the `extract()` + /// finite-guard dropped that frame from history — but the NaN was already + /// latched into `state.y1`/`y2`, so every subsequent output stayed NaN, the + /// finite-guard rejected it too, and the history buffer never refilled. + /// Breathing extraction was then dead until `reset()`. A control run on the + /// same clean signal yields 15 BPM (0.25 Hz); after a leading NaN frame the + /// OLD code returned `None` with `history_len() == 0` forever. This test + /// asserts recovery (FAILS on the old code, verified by reverting the + /// `bandpass_filter` self-heal). + #[test] + fn nan_frame_does_not_permanently_poison_filter() { + let sr = 10.0; + let feed_clean = |ext: &mut BreathingExtractor| { + let mut last = None; + for i in 0..600 { + let t = i as f64 / sr; + let s = (2.0 * std::f64::consts::PI * 0.25 * t).sin(); + last = ext.extract(&[s], &[1.0]); + } + last + }; + + // Control: clean signal accumulates history and detects ~15 BPM. + let mut control = BreathingExtractor::new(1, sr, 60.0); + let control_res = feed_clean(&mut control); + assert!(control.history_len() > 0); + assert!(control_res.is_some(), "control clean run must produce an estimate"); + + // A leading NaN frame must not kill the extractor. + let mut ext = BreathingExtractor::new(1, sr, 60.0); + ext.extract(&[f64::NAN], &[1.0]); + let res = feed_clean(&mut ext); + assert!( + ext.history_len() > 0, + "extractor must recover and refill history after a NaN frame (got {})", + ext.history_len() + ); + assert!(res.is_some(), "extractor must recover an estimate after a NaN frame"); + } + + /// ADR-158 §A1: a mid-stream `inf` must not freeze the history buffer. + #[test] + fn inf_mid_stream_does_not_freeze_history() { + let sr = 10.0; + let mut ext = BreathingExtractor::new(1, sr, 60.0); + let clean = |ext: &mut BreathingExtractor, count: usize| { + for i in 0..count { + let t = i as f64 / sr; + let s = (2.0 * std::f64::consts::PI * 0.25 * t).sin(); + ext.extract(&[s], &[1.0]); + } + }; + clean(&mut ext, 300); + let before = ext.history_len(); + assert!(before > 0); + ext.extract(&[f64::INFINITY], &[1.0]); // poison mid-stream + clean(&mut ext, 600); + assert!( + ext.history_len() > before, + "history must keep growing after an inf frame (before={}, after={})", + before, + ext.history_len() + ); + } + /// ADR-157 §A3 bug-catching test. Divergence needs the pole magnitude /// `|r| >= 1`, i.e. `bw >= 4`. At `fs = 0.5` Hz with the band widened to /// 0.1-0.9 Hz, `bw = 2*pi*(0.9-0.1)/0.5 = 10.05`, so the OLD pole radius diff --git a/v2/crates/wifi-densepose-vitals/src/heartrate.rs b/v2/crates/wifi-densepose-vitals/src/heartrate.rs index 72af98c8..924fd2f2 100644 --- a/v2/crates/wifi-densepose-vitals/src/heartrate.rs +++ b/v2/crates/wifi-densepose-vitals/src/heartrate.rs @@ -32,6 +32,15 @@ impl Default for IirState { } } +/// Lowest physiologically plausible heart rate, in BPM. Estimates below this +/// (e.g. a lock onto a breathing harmonic, which the firmware #987 fix also +/// guards against) are rejected rather than emitted as a confident vital — a +/// false low HR is a safety problem. Value-identical to the prior literal. +const HR_PLAUSIBLE_MIN_BPM: f64 = 40.0; +/// Highest physiologically plausible heart rate, in BPM. Estimates above this +/// are rejected. Value-identical to the prior literal. +const HR_PLAUSIBLE_MAX_BPM: f64 = 180.0; + /// Heart rate extractor using bandpass filtering and autocorrelation /// peak detection. pub struct HeartRateExtractor { @@ -140,8 +149,11 @@ impl HeartRateExtractor { let frequency_hz = self.sample_rate / period_samples as f64; let bpm = frequency_hz * 60.0; - // Validate BPM is in physiological range (40-180 BPM) - if !(40.0..=180.0).contains(&bpm) { + // Validate BPM is in the physiological plausibility band. An estimate + // outside [HR_PLAUSIBLE_MIN_BPM, HR_PLAUSIBLE_MAX_BPM] is rejected + // rather than emitted, so an out-of-band autocorrelation lock can never + // surface as a confident heart rate. + if !(HR_PLAUSIBLE_MIN_BPM..=HR_PLAUSIBLE_MAX_BPM).contains(&bpm) { return None; } @@ -191,6 +203,20 @@ impl HeartRateExtractor { let output = (1.0 - r) * (input - state.x2) + 2.0 * r * cos_w0 * state.y1 - r * r * state.y2; + // Self-healing non-finite guard (ADR-158 §A1). A single non-finite + // sample — a NaN/inf residual from a corrupt CSI frame, or a transient + // overflow — would otherwise be written into `y1`/`y2` and poison the + // resonator recurrence *permanently*: every later output stays NaN, the + // `extract()` finite-check drops it, `acf0` never recomputes on fresh + // data, and heart-rate extraction is dead until `reset()`. Resetting the + // filter state here lets the resonator recover on the next clean frame; + // the 0.0 returned for this frame is still dropped by the caller's + // `is_finite()` check, so no spurious sample enters history. + if !output.is_finite() { + *state = IirState::default(); + return 0.0; + } + state.x2 = state.x1; state.x1 = input; state.y2 = state.y1; @@ -420,6 +446,92 @@ mod tests { assert_eq!(ext.n_subcarriers, 56); } + /// Pin the physiological plausibility band to its documented values. If a + /// future edit widens these, an implausible HR could be emitted as a + /// confident vital — this characterization test forces that to be a + /// deliberate, reviewed change. + #[test] + fn plausibility_band_constants_pinned() { + assert!((HR_PLAUSIBLE_MIN_BPM - 40.0).abs() < f64::EPSILON); + assert!((HR_PLAUSIBLE_MAX_BPM - 180.0).abs() < f64::EPSILON); + } + + /// ADR-158 §A1 bug-catching test: a single non-finite residual must NOT + /// permanently poison the IIR filter state. + /// + /// The cardiac resonator latches `y[n]` into `state.y1`/`y2`. Before the + /// fix, one NaN/inf residual produced a NaN `output` that was stored into + /// the state; the `extract()` finite-guard dropped that frame from history, + /// but every subsequent output stayed NaN, so the history buffer never + /// refilled and HR extraction was dead until `reset()`. After a leading NaN + /// frame, the OLD code returned `None` with `history_len() == 0` forever. + /// This asserts recovery (FAILS on the old code). + #[test] + fn nan_frame_does_not_permanently_poison_filter() { + let sr = 50.0; + let feed_clean = |ext: &mut HeartRateExtractor| { + let mut last = None; + for i in 0..1200 { + let t = i as f64 / sr; + let base = (2.0 * std::f64::consts::PI * 1.2 * t).sin(); + let r = vec![base * 0.1, base * 0.08, base * 0.12, base * 0.09]; + last = ext.extract(&r, &[0.0, 0.01, 0.02, 0.03]); + } + last + }; + + let mut control = HeartRateExtractor::new(4, sr, 20.0); + feed_clean(&mut control); + assert!(control.history_len() > 0, "control clean run must accumulate history"); + + let mut ext = HeartRateExtractor::new(4, sr, 20.0); + ext.extract(&[f64::NAN, 0.1, 0.1, 0.1], &[0.0, 0.01, 0.02, 0.03]); + feed_clean(&mut ext); + assert!( + ext.history_len() > 0, + "HR extractor must recover and refill history after a NaN frame (got {})", + ext.history_len() + ); + } + + /// Safety negative: pure broadband noise (no cardiac component) must NOT be + /// reported as a clinically `Valid` heart rate. A false "HR = 72 bpm" on + /// noise is a safety problem (false reassurance / false alert). The + /// extractor may still emit a low-confidence guess, but its status must be + /// `Degraded`/`Unreliable`, never `Valid`. Mirrors the honest-negative + /// requirement in the review brief. + #[test] + fn pure_noise_is_never_reported_valid() { + let mut seed: u64 = 0x1234_5678; + let mut rng = || { + seed = seed + .wrapping_mul(6_364_136_223_846_793_005) + .wrapping_add(1_442_695_040_888_963_407); + ((seed >> 33) as f64 / (1u64 << 31) as f64) - 1.0 + }; + let mut ext = HeartRateExtractor::new(8, 50.0, 20.0); + let mut last = None; + for _ in 0..1500 { + let r: Vec = (0..8).map(|_| rng()).collect(); + let p: Vec = (0..8).map(|_| rng()).collect(); + last = ext.extract(&r, &p); + } + if let Some(est) = last { + assert_ne!( + est.status, + VitalStatus::Valid, + "pure noise must not yield a clinically Valid HR (bpm={}, conf={})", + est.value_bpm, + est.confidence + ); + assert!( + est.confidence < 0.6, + "noise HR confidence must stay below the Valid cutoff: {}", + est.confidence + ); + } + } + /// ADR-157 §A3 bug-catching test. /// /// Divergence needs the pole *magnitude* `|r| >= 1`, i.e. `bw >= 4`. With