From eaedfded6f0de0c5e3888eeb78ff899813c71932 Mon Sep 17 00:00:00 2001 From: rUv Date: Mon, 11 May 2026 23:40:55 -0400 Subject: [PATCH] fix(train): wire wifi-densepose-signal into the pipeline; correct MODEL_CARD env-sensor claim (#536) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses three findings from the 2026-05-11 training-pipeline audit: #1/#2 — `wifi-densepose-signal` was a phantom dependency of `wifi-densepose-train` (listed in Cargo.toml, never imported), and vitals/CSI signal features were absent from the pipeline. New module `wifi_densepose_train::signal_features`: `extract_signal_features(&Array4, &Array4) -> Array1` (and the convenience method `CsiSample::signal_features()`) runs a windowed observation's centre frame through `wifi_densepose_signal::features::FeatureExtractor`, producing a fixed-length (FEATURE_LEN=12) amplitude / phase-coherence / PSD feature vector — the hook for a future vitals / multi-task supervision head (breathing- and heart-rate-band power are read off the PSD summary). The vector is produced on demand and is not yet fed back into the loss; wiring it as a training target is the documented follow-up. `wifi-densepose-signal` is now an actually-used dependency. 5 new tests (2 unit in signal_features.rs, 3 integration in tests/test_dataset.rs); existing wifi-densepose-train tests unchanged and green. #3 — `docs/huggingface/MODEL_CARD.md` presented PIR/BME280 environmental-sensor weak-label fine-tuning as a current capability; there is no env-sensor ingestion in the training pipeline. Marked that path as planned/not-implemented in the training-steps list and the data-provenance section. (#5 — README's "92.9% PCK@20" overclaim — fixed separately in PR #535.) CHANGELOG updated. --- CHANGELOG.md | 7 + docs/huggingface/MODEL_CARD.md | 6 +- v2/crates/wifi-densepose-train/src/dataset.rs | 17 ++ v2/crates/wifi-densepose-train/src/lib.rs | 1 + .../src/signal_features.rs | 155 ++++++++++++++++++ .../tests/test_dataset.rs | 49 ++++++ 6 files changed, 232 insertions(+), 3 deletions(-) create mode 100644 v2/crates/wifi-densepose-train/src/signal_features.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 639257d0..a62f1915 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 PowerPlatePulse training-pipeline audit (2026-05-11); 6 remaining audit findings tracked in the PR. +### Added +- **`wifi-densepose-train`: `signal_features` module — wires `wifi-densepose-signal` into the training pipeline.** `wifi-densepose-signal` was previously a phantom dependency of `wifi-densepose-train` (listed in `Cargo.toml`, never imported). New `wifi_densepose_train::signal_features::extract_signal_features` (and `CsiSample::signal_features()`) run a windowed CSI observation's centre frame through `wifi_densepose_signal::features::FeatureExtractor`, producing a fixed-length (`FEATURE_LEN = 12`) amplitude/phase/PSD feature vector — the hook for a future vitals / multi-task supervision head (breathing- and heart-rate-band power are read off the PSD summary). The vector is produced on demand and not yet fed back into the loss. Surfaced by the 2026-05-11 training-pipeline audit (findings #1 "vitals features absent from training" and #2 "`wifi-densepose-signal` ghost dep"). + +### Fixed +- **HuggingFace `MODEL_CARD.md`: marked the PIR/BME280 environmental-sensor ground-truth path as planned, not implemented** (training-pipeline audit finding #3) — the card presented PIR/BME280 weak-label fine-tuning as a current capability; there is no env-sensor ingestion in the training pipeline today. +- **README: corrected the camera-supervised pose-accuracy claim** (audit finding #5; see PR #535) — "92.9% PCK@20" → the ADR-079 target (35%+; proxy baseline 35.3%), noting P7/P8/P9 are pending. + ### Added - **`nvsim` crate — deterministic NV-diamond magnetometer pipeline simulator** (ADR-089) — New standalone leaf crate at `v2/crates/nvsim` modeling a forward-only diff --git a/docs/huggingface/MODEL_CARD.md b/docs/huggingface/MODEL_CARD.md index 805d4f32..7106c95b 100644 --- a/docs/huggingface/MODEL_CARD.md +++ b/docs/huggingface/MODEL_CARD.md @@ -168,14 +168,14 @@ The training process works like this: 1. **Collect** raw CSI frames from ESP32-S3 nodes placed in a room 2. **Extract** 8-dimensional feature vectors from sliding windows of CSI data 3. **Contrast** -- the model learns that features from nearby time windows should produce similar embeddings, while features from different scenarios should produce different embeddings -4. **Fine-tune** task heads using weak labels from environmental sensors (PIR motion, temperature, pressure) on the Cognitum Seed companion device +4. **Fine-tune** task heads — *planned:* weak labels from environmental sensors (PIR motion, temperature, pressure) on the Cognitum Seed companion device. **This environmental-sensor ground-truth path is not yet implemented** (no PIR/BME280 ingestion in the training pipeline today); current task-head supervision uses the proxy/camera labels described elsewhere. ### Data provenance - **Source:** Live CSI from 2x ESP32-S3 nodes (802.11n, HT40, 114 subcarriers) - **Volume:** ~360,000 CSI frames (~3,600 feature vectors) per collection run - **Environment:** Residential room, ~4x5 meters -- **Ground truth:** Environmental sensors on Cognitum Seed (PIR, BME280, light) +- **Ground truth:** *Planned* — environmental sensors on the Cognitum Seed (PIR, BME280, light). Not yet wired into training; treat the PIR/BME280 references in this card as the intended design, not a current capability. - **Attestation:** Every collection run produces a cryptographic witness chain (`collection-witness.json`) that proves data provenance and integrity ### Witness chain @@ -208,7 +208,7 @@ Add a second ESP32-S3 to enable cross-node signal fusion for better accuracy and | USB-C cables (x3) | Power + data | ~$9 | | **Total** | | **~$27** | -The Cognitum Seed runs the ONNX models on-device, orchestrates the ESP32 nodes over USB serial, and provides environmental ground truth via its onboard PIR and BME280 sensors. +The Cognitum Seed runs the ONNX models on-device and orchestrates the ESP32 nodes over USB serial. (Using its onboard PIR/BME280 sensors as training ground truth is planned but not yet implemented — see "Data provenance" above.) --- diff --git a/v2/crates/wifi-densepose-train/src/dataset.rs b/v2/crates/wifi-densepose-train/src/dataset.rs index 7ee18d53..d1406502 100644 --- a/v2/crates/wifi-densepose-train/src/dataset.rs +++ b/v2/crates/wifi-densepose-train/src/dataset.rs @@ -92,6 +92,23 @@ pub struct CsiSample { pub frame_id: u64, } +impl CsiSample { + /// Derive the compact signal-processing feature vector for this sample + /// via [`crate::signal_features::extract_signal_features`] (see that + /// function for the layout, and [`crate::signal_features::FEATURE_LEN`] + /// for its length). + /// + /// Computed on demand from [`Self::amplitude`]/[`Self::phase`] — not + /// cached on the struct. This is the hook for folding the SOTA + /// signal-processing crate's amplitude/phase/PSD features (and, in a + /// later iteration, vitals-band power) into training; the raw vector is + /// returned here and is not yet fed back into the loss. + #[must_use] + pub fn signal_features(&self) -> Array1 { + crate::signal_features::extract_signal_features(&self.amplitude, &self.phase) + } +} + // --------------------------------------------------------------------------- // CsiDataset trait // --------------------------------------------------------------------------- diff --git a/v2/crates/wifi-densepose-train/src/lib.rs b/v2/crates/wifi-densepose-train/src/lib.rs index 8831c549..08304840 100644 --- a/v2/crates/wifi-densepose-train/src/lib.rs +++ b/v2/crates/wifi-densepose-train/src/lib.rs @@ -51,6 +51,7 @@ pub mod eval; pub mod geometry; pub mod rapid_adapt; pub mod ruview_metrics; +pub mod signal_features; pub mod subcarrier; pub mod virtual_aug; diff --git a/v2/crates/wifi-densepose-train/src/signal_features.rs b/v2/crates/wifi-densepose-train/src/signal_features.rs new file mode 100644 index 00000000..e3ab58a2 --- /dev/null +++ b/v2/crates/wifi-densepose-train/src/signal_features.rs @@ -0,0 +1,155 @@ +//! Hand-off layer between raw windowed CSI and the SOTA signal-processing +//! crate ([`wifi_densepose_signal`]). +//! +//! Historically `wifi-densepose-signal` was listed as a dependency of this +//! crate but never imported — the training pipeline only ever consumed the +//! raw amplitude/phase tensors. This module wires the two together: it takes +//! a windowed CSI observation and runs it through +//! [`wifi_densepose_signal::features::FeatureExtractor`] to derive a compact, +//! fixed-length feature vector (amplitude statistics, phase coherence, and a +//! power-spectral-density summary). +//! +//! These derived features are the building block for a future vitals / +//! multi-task supervision head (breathing-band and heart-rate-band power can +//! be read off the PSD summary); for now they are produced on demand via +//! [`extract_signal_features`] / [`crate::dataset::CsiSample::signal_features`] +//! and are not yet fed back into the loss. Wiring them as a training target +//! is tracked as a follow-up to the 2026-05-11 training-pipeline audit. + +use ndarray::{s, Array1, Array4}; +use wifi_densepose_signal::csi_processor::CsiData; +use wifi_densepose_signal::features::FeatureExtractor; + +/// Length of the vector returned by [`extract_signal_features`]. +/// +/// The layout is: +/// 1. amplitude peak +/// 2. amplitude RMS +/// 3. amplitude dynamic range (max − min) +/// 4. mean of the per-subcarrier amplitude means +/// 5. mean of the per-subcarrier amplitude variances +/// 6. phase coherence +/// 7. mean of the per-subcarrier phase variances +/// 8. PSD total power +/// 9. PSD peak power +/// 10. PSD peak frequency (Hz) +/// 11. PSD spectral centroid +/// 12. PSD spectral bandwidth +pub const FEATURE_LEN: usize = 12; + +/// Default centre frequency assumed when the CSI window carries no metadata. +const DEFAULT_CENTRE_FREQ_HZ: f64 = 2.4e9; + +/// Default channel bandwidth (HT40) assumed when the CSI window carries no +/// metadata. +const DEFAULT_BANDWIDTH_HZ: f64 = 40.0e6; + +/// Derive a compact, fixed-length ([`FEATURE_LEN`]) signal-processing feature +/// vector from a windowed CSI observation by running its centre frame through +/// [`wifi_densepose_signal::features::FeatureExtractor`]. +/// +/// `amplitude` and `phase` are `[window_frames, n_tx, n_rx, n_subcarriers]` +/// tensors (the [`crate::dataset::CsiSample`] layout). The centre frame is +/// flattened to `[n_tx · n_rx, n_subcarriers]` (the antenna-major shape the +/// signal crate expects) and converted to `f64`. +/// +/// The returned values are always finite for finite input: the underlying +/// extractors clamp degenerate cases, and any non-finite result is mapped to +/// `0.0` so callers can rely on the vector being usable as a model feature. +pub fn extract_signal_features(amplitude: &Array4, phase: &Array4) -> Array1 { + let (n_t, n_tx, n_rx, n_sc) = amplitude.dim(); + debug_assert_eq!(amplitude.dim(), phase.dim(), "amplitude/phase shape mismatch"); + if n_t == 0 || n_tx == 0 || n_rx == 0 || n_sc == 0 { + return Array1::zeros(FEATURE_LEN); + } + let n_ant = n_tx * n_rx; + let t = n_t / 2; + + let to_2d = |src: &Array4| -> Vec { + src.slice(s![t, .., .., ..]).iter().map(|&v| f64::from(v)).collect() + }; + let amp2d = match ndarray::Array2::from_shape_vec((n_ant, n_sc), to_2d(amplitude)) { + Ok(a) => a, + Err(_) => return Array1::zeros(FEATURE_LEN), + }; + let phase2d = match ndarray::Array2::from_shape_vec((n_ant, n_sc), to_2d(phase)) { + Ok(p) => p, + Err(_) => return Array1::zeros(FEATURE_LEN), + }; + + let csi = match CsiData::builder() + .amplitude(amp2d) + .phase(phase2d) + .frequency(DEFAULT_CENTRE_FREQ_HZ) + .bandwidth(DEFAULT_BANDWIDTH_HZ) + .build() + { + Ok(c) => c, + Err(_) => return Array1::zeros(FEATURE_LEN), + }; + + let feats = FeatureExtractor::default_config().extract(&csi); + + let amp_mean_overall = mean_or_zero(feats.amplitude.mean.iter().copied()); + let amp_var_overall = mean_or_zero(feats.amplitude.variance.iter().copied()); + let phase_var_overall = mean_or_zero(feats.phase.variance.iter().copied()); + + let raw = [ + feats.amplitude.peak, + feats.amplitude.rms, + feats.amplitude.dynamic_range, + amp_mean_overall, + amp_var_overall, + feats.phase.coherence, + phase_var_overall, + feats.psd.total_power, + feats.psd.peak_power, + feats.psd.peak_frequency, + feats.psd.centroid, + feats.psd.bandwidth, + ]; + debug_assert_eq!(raw.len(), FEATURE_LEN); + Array1::from_iter(raw.iter().map(|&v| sanitise(v))) +} + +/// Mean of an iterator of `f64`, or `0.0` if it is empty or non-finite. +fn mean_or_zero>(it: I) -> f64 { + let (sum, n) = it.fold((0.0_f64, 0_usize), |(s, k), v| (s + v, k + 1)); + if n == 0 { + 0.0 + } else { + sum / n as f64 + } +} + +/// Map non-finite values to `0.0` and downcast to `f32`. +fn sanitise(v: f64) -> f32 { + if v.is_finite() { + v as f32 + } else { + 0.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use ndarray::Array4; + + #[test] + fn zero_sized_input_yields_zero_vector() { + let empty = Array4::::zeros((0, 0, 0, 0)); + let f = extract_signal_features(&empty, &empty); + assert_eq!(f.len(), FEATURE_LEN); + assert!(f.iter().all(|&v| v == 0.0)); + } + + #[test] + fn constant_input_is_finite_and_correct_length() { + let amp = Array4::::from_elem((4, 3, 3, 56), 1.5); + let phase = Array4::::from_elem((4, 3, 3, 56), 0.25); + let f = extract_signal_features(&, &phase); + assert_eq!(f.len(), FEATURE_LEN); + assert!(f.iter().all(|v| v.is_finite()), "features must be finite: {f:?}"); + } +} diff --git a/v2/crates/wifi-densepose-train/tests/test_dataset.rs b/v2/crates/wifi-densepose-train/tests/test_dataset.rs index 550266ea..25fe005f 100644 --- a/v2/crates/wifi-densepose-train/tests/test_dataset.rs +++ b/v2/crates/wifi-densepose-train/tests/test_dataset.rs @@ -458,3 +458,52 @@ fn dataloader_empty_dataset_zero_batches() { "iterator over empty dataset must yield 0 items" ); } + +// --------------------------------------------------------------------------- +// CsiSample::signal_features — the wifi-densepose-signal wiring +// --------------------------------------------------------------------------- + +/// `signal_features()` must return a vector of exactly `FEATURE_LEN`, all +/// finite, for a real (synthetic) sample. +#[test] +fn signal_features_have_correct_length_and_are_finite() { + use wifi_densepose_train::signal_features::FEATURE_LEN; + + let ds = SyntheticCsiDataset::new(8, default_cfg()); + let sample = ds.get(0).expect("sample 0 must exist"); + let feats = sample.signal_features(); + assert_eq!( + feats.len(), + FEATURE_LEN, + "signal_features() must return FEATURE_LEN ({FEATURE_LEN}) values" + ); + assert!( + feats.iter().all(|v| v.is_finite()), + "all signal features must be finite, got {feats:?}" + ); +} + +/// `signal_features()` is deterministic for a given (deterministic) sample. +#[test] +fn signal_features_are_deterministic() { + let ds = SyntheticCsiDataset::new(8, default_cfg()); + let a = ds.get(0).expect("sample 0").signal_features(); + let b = ds.get(0).expect("sample 0").signal_features(); + assert_eq!( + a, b, + "signal_features() must be deterministic for the same sample" + ); +} + +/// `extract_signal_features` returns the zero vector for a zero-sized window +/// rather than panicking. +#[test] +fn signal_features_zero_window_is_zero_vector() { + use ndarray::Array4; + use wifi_densepose_train::signal_features::{extract_signal_features, FEATURE_LEN}; + + let empty = Array4::::zeros((0, 0, 0, 0)); + let feats = extract_signal_features(&empty, &empty); + assert_eq!(feats.len(), FEATURE_LEN); + assert!(feats.iter().all(|&v| v == 0.0)); +}