From cfd0ad76cfd273737e4299dbd3e77b493f04e218 Mon Sep 17 00:00:00 2001 From: rUv Date: Sun, 14 Jun 2026 23:58:09 -0400 Subject: [PATCH] security(core,cli): pin CSI-deserialiser DoS-resistance + ADR-172 (clean-with-evidence) (#1091) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * test(core,cli): pin DoS-resistance of CSI deserialisers (ADR-127 security review) Beyond-SOTA security review of wifi-densepose-core + wifi-densepose-cli. Load-bearing-question verdict: the NaN-state-poisoning bug class does NOT originate in core — core exposes no stateful accumulator (no Welford, von-Mises, IIR, voxel grid, running mean); each downstream crate rolls its own, so each fix is correctly local. Both crates confirmed clean on every reviewed dimension (panic-on-adversarial-input, NaN handling, unbounded memory, path traversal, secrets) — no production code changed. Adds 4 regression pins locking in two existing-but-untested DoS guards: - core: from_canonical_bytes shape guard (Vec::with_capacity bound) — proven to fail with `capacity overflow` when the saturating-mul guard is removed. - core: canonical decoder never panics on arbitrary/truncated bytes. - cli: parse_csi_packet rejects an oversized n_antennas*n_subcarriers claim before Array2 allocation (33 MB claim in a 2 KB datagram -> None). - cli: parse_csi_packet never panics on arbitrary UDP bytes. core: 35 -> 37 lib tests; cli: 24 -> 26 tests; 0 failed. Python proof unchanged (f8e76f21…46f7a — off the signal path). Co-Authored-By: claude-flow * docs(adr): ADR-172 — wifi-densepose-cli + core CSI-deserialiser security review Records the clean-with-evidence verdict + 4 DoS-resistance regression pins (test-only, committed in a1051607d). Documents the load-bearing finding: the NaN-state-poisoning bug class does NOT originate in a shared core primitive (core exposes no stateful accumulator — MEASURED via grep), so the 3 prior downstream-local fixes are complete. Gives the wifi-densepose-cli review its own ADR slot (core portion cross-refs ADR-127 §9). Co-Authored-By: claude-flow --- CHANGELOG.md | 1 + ...i-core-csi-deserialiser-security-review.md | 117 ++++++++++++++++++ v2/crates/wifi-densepose-cli/src/calibrate.rs | 48 +++++++ v2/crates/wifi-densepose-core/src/types.rs | 61 +++++++++ 4 files changed, 227 insertions(+) create mode 100644 docs/adr/ADR-172-cli-core-csi-deserialiser-security-review.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 3194df50..b1fd80ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Security +- **`wifi-densepose-core` + `wifi-densepose-cli` beyond-SOTA security review (ADR-127 note; CLI needs ADR slot) — NaN-state-poisoning bug class does NOT originate in core (verdict: no + evidence); both crates confirmed clean on all reviewed dimensions; 4 regression pins added locking in two real DoS guards.** **Load-bearing question — verdict NO (with evidence, MEASURED).** The NaN-state-poisoning class that hit `wifi-densepose-calibration`/`-vitals`/`-geo` (a non-finite input latching into a persistent IIR/Welford/von-Mises/voxel accumulator → silent permanent feature failure) does **not** live in a shared `wifi-densepose-core` primitive: core exposes **no stateful accumulator at all** — no Welford/running-mean, no von-Mises/circular-mean, no IIR/biquad filter state, no voxel grid. Grep over `core/src` for `welford|von_mises|biquad|y1|y2|running_mean|accumulat|voxel|self.*+=` matched only the `InvalidState` *error* enum, "reset state" doc comments, and a test-only LCG — zero stateful logic (MEASURED). Each downstream crate rolls its own accumulator, so each fix is correctly local; corroborated by `wifi-densepose-calibration::Features::from_series`, which **already** filters non-finite samples and returns `Features::ZERO` (the downstream re-implementation of the fix). The only float math in core's hot path is construction-time projection (`CsiFrame::new` → `amplitude`/`phase` via `mapv`) and pure stateless `utils` functions — none persists state across frames. **Dimensions confirmed clean (with evidence):** (1) **panic-on-adversarial-input = 0** — `CsiFrame::from_canonical_bytes` (the replay/forward deserialisation boundary) returns a typed `CanonicalDecodeError` for every malformed input (truncation, bad discriminant, non-UTF-8 device id, nonzero reserved bytes, shape/payload mismatch, trailing bytes); the CLI UDP parser `parse_csi_packet` (the widest CLI attack surface — bound to `0.0.0.0` by default) returns `None` on any malformed datagram. Both proven panic-free over deterministic-LCG fuzz sweeps (new pins). (2) **`Confidence::new` rejects NaN** (`!(0.0..=1.0).contains(&NaN)` ⇒ `true` ⇒ `Err`); `compute_bounding_box`/`to_flat_array` are NaN-tolerant (f32 `min`/`max` ignore NaN). (3) **`amplitude_variance`/`mean_amplitude` panic-free on empty frames** — ndarray 0.17 `var(0.0)`/`mean()` return finite/`None` (handled), not a panic (MEASURED via throwaway probe). (4) **Unbounded-memory DoS bounded** in both deserialisers: `from_canonical_bytes` guards the `Vec::with_capacity(rows*cols)` with `rows.saturating_mul(cols).saturating_mul(16) <= bytes.len()`; `parse_csi_packet` guards `Array2::zeros` with `buf.len() < 20 + n_pairs*2` — a header lying about an enormous `rows×cols` / `n_antennas×n_subcarriers` is rejected before allocation. (5) **CLI path-traversal** in `calibrate-serve` already defended by `sanitize_room_id` (keeps `[A-Za-z0-9_-]`, caps 64 chars, with tests) on every client-supplied `room_id`/`bank`/`baseline` name that reaches a file path; bearer-auth gate + non-loopback-bind warning present. (6) **No hardcoded secrets** (`--token` read from `CALIBRATE_TOKEN` env, never embedded). **Regression pins added (fails-on-old / passes-on-new):** core `canonical_decode_oversized_shape_is_bounded_not_allocated` (MEASURED: with the saturating guard removed it panics `capacity overflow` at `types.rs:801`; passes with the guard) and `canonical_decode_never_panics_on_arbitrary_bytes`; CLI `test_parse_csi_packet_oversized_claim_is_rejected_not_allocated` (a 255×65535-pair claim ≈ 33 MB in a 2 KB datagram → `None`, never OOMs) and `test_parse_csi_packet_never_panics_on_arbitrary_bytes`. `cargo test -p wifi-densepose-core`: **35 → 37** lib tests, 0 failed; `cargo test -p wifi-densepose-cli --no-default-features`: **24 → 26**, 0 failed. Workspace green; Python deterministic proof unchanged (`f8e76f21…46f7a`, bit-exact — core/cli are off the signal proof path). No production code changed — review is clean-with-evidence plus pins. - **`homecore` foundational state-machine review (ADR-127) — one real concurrency bug fixed (state-set TOCTOU dropping/reordering `state_changed` events) + two hardening fixes (entity_id memory-DoS, service-handler panic isolation), each pinned by a fails-on-old test; event-bus lag & lock discipline confirmed clean with evidence.** Beyond-SOTA security+concurrency review of the crate every other HOMECORE module builds on (state store `state.rs`, event bus `bus.rs`, service/entity registries, the `HomeCore` coordinator), un-covered by the ADR-154–159 sweep — a bug here is high-blast-radius. **HC-RACE-01 (state-set TOCTOU, the crux — race/lost-event).** `StateMachine::set` did `get()` (releasing the DashMap shard lock) → compute the next snapshot + the no-op/`last_changed` decision → `insert()` (re-acquiring the lock) → `send()`; the read-modify-write was **not atomic** w.r.t. a concurrent writer on the same entity, contradicting ADR-127 §2.1's promise that "the writer atomically replaces the map entry." A writer that read a **stale `old`** could mis-classify a genuine transition as a no-op and **silently drop its `state_changed` event** (a missed automation trigger) or fire an event whose `new_state` duplicated the previously delivered one (a spurious trigger for any automation keyed on `old_state != new_state`). **Fixed** by holding the shard write-lock across the whole read→decide→insert→fire sequence via `entry()`/`insert_entry()` — `tx.send` is non-blocking, non-async, and never re-enters the map, so firing under the shard lock cannot deadlock and keeps global event order in lock-step with global commit order. Pinned by `concurrent_set_fires_no_duplicate_adjacent_events` (4 writers toggling one entity A/B; asserts no two consecutive fired events carry an identical `new_state` — impossible under correct serialisation; an instrumented probe observed ~93k such duplicate-adjacent events across 200 trials on the racy code, **zero** on the fix; the test fails reliably on the first trial pre-fix). **HC-EID-LEN-01 (unbounded `entity_id`, memory-DoS).** `homecore-api/src/rest.rs` parses untrusted REST path segments straight through `EntityId::parse`; with no length cap an otherwise-valid id (`a.` + many MB of `[a-z0-9_]`) was accepted, and a `POST /api/states/` would persist it into the DashMap state store (permanent growth across distinct ids). **Fixed** by rejecting ids longer than `MAX_ENTITY_ID_LEN` (255, HA-compatible) up front in `parse()`, before any per-char scan, with a new `EntityIdError::TooLong` — fail-closed at the boundary type protects every caller (REST, registry deserialize, automation). Pinned by `entity_id_length_boundary` (exactly-MAX accepted; MAX+1 and a 4 MiB id rejected — oversized parses `Ok` on old code). **HC-SVC-PANIC-01 (service-handler panic not isolated).** `ServiceRegistry::call` already ran handlers **outside** the registry lock (the `Arc` is cloned out of the read guard first → no `RwLock` poisoning, no blocking of other callers — clean), but a panicking handler unwound through `call()` into the caller's task (the task driving the engine). **Hardened** by wrapping the handler future in `AssertUnwindSafe` + `catch_unwind`, converting a panic to `ServiceError::HandlerPanicked`; the registry stays fully usable (a sibling healthy service still returns, the bad service stays registered). Pinned by `panicking_handler_is_isolated_and_registry_survives` (unwinds through `call` on old code). **Dimensions confirmed clean (with evidence, no invented issues):** (1) **event-bus bounds / lag** (the homecore-api WS lag-DoS class) — both `StateMachine` and `EventBus` use **bounded** `tokio::sync::broadcast` (capacity 4,096); a slow subscriber gets a recoverable `Lagged(n)` (drop-oldest + re-sync) while `fire_*` is non-blocking and never waits on slow receivers, so a lagging subscriber **cannot block the publisher, grow the channel without bound, or kill a fast subscriber** (evidenced by `slow_subscriber_does_not_block_publisher_or_kill_the_bus` — fire 3× capacity at an idle subscriber, publisher unblocked, bus stays live, fresh fast subscriber receives, lagged one recovers); (2) **lock ordering / lock-across-await** (deadlock) — no code path holds two of `{state DashMap, registry RwLock, service RwLock}` simultaneously, so no inconsistent-ordering deadlock can exist; every `tokio::sync::RwLock` guard in `registry.rs`/`service.rs` is used in one synchronous statement and dropped before any `.await` (`call` explicitly scopes the read guard out before awaiting the handler); the only guard held across a send is the DashMap shard lock in `set`, across a **synchronous** broadcast send — safe; (3) **panic-on-input** — no reachable `unwrap`/`expect`/index in non-test code beyond the safe `send().unwrap_or(0)` and the dead-but-harmless `split_once(...).unwrap_or(...)` fallbacks on already-validated ids. `cargo test -p homecore --no-default-features`: **20 → 24 passed, 0 failed** (+4 pins). Workspace green; Python deterministic proof unchanged (`f8e76f21…46f7a`, bit-exact — `homecore` is off the signal proof path). Review notes appended to ADR-127 §9. - **`homecore-migrate` security review (ADR-165 surfaces) — one real secret-leak fix; traversal / data-loss / panic / injection dimensions confirmed clean with evidence.** Beyond-SOTA review of the Home-Assistant `.storage`/`secrets.yaml`/`automations.yaml` migrator, the two sharp surfaces being secret handling (`secrets.rs`) and untrusted-file parsing. **Finding + fix (secret-leak, `secrets.rs`):** a malformed `secrets.yaml` whose offending scalar fails a typed-tag coercion (e.g. `port: !!int `) produced a `serde_yaml` error whose message **embeds the scalar verbatim** — `invalid value: string ""`. The old code wrapped that message into `MigrateError::YamlParse { source }`; the error propagates out of `read_secrets`, is `?`-returned by the `InspectSecrets` CLI path in `main.rs`, and printed to stderr by `anyhow` — **leaking a secret value despite the CLI's deliberate `` design** (`main.rs` only ever prints keys as ` = `). Fix: `secrets.yaml` parse failures now map to a dedicated redacting variant `MigrateError::SecretsParse { path, line, column }` carrying only the file path + a coarse location (from `serde_yaml::Error::location()`), never the scalar; other (non-secret) YAML files keep `YamlParse`. **Pinned** by `secrets::tests::malformed_secrets_error_never_contains_secret_value`, which asserts the rendered error **and its full `#[source]` chain** never contain the secret value and that the error is still the structured `SecretsParse` (fail-closed) — it **fails on the old `YamlParse` path** (observed leak: `... invalid value: string "s3cr3t_TOKEN_VALUE" ...`) and passes on the fix; plus `malformed_secrets_error_reports_location` (still locatable). **Confirmed clean with evidence:** *secret leakage elsewhere* — the only secret sink is the value map; `main.rs` redacts values, and the `MissingField`/`Io` paths surface only the path, never content. *Source mutation / data-loss* — **structurally impossible**: there is no `fs::write`/`fs::remove`/`fs::create`/`File::create`/`OpenOptions` anywhere in the crate; P1 reads source and writes nothing (`import-entities` is in-memory only), so re-runs are trivially idempotent and the HA source is never touched. *Path traversal* — CLI takes a `--config-dir`/`--storage` dir and joins **fixed** filenames (`secrets.yaml`, `core.entity_registry`, …); no user-controlled path component, no `..`/absolute escape beyond the user's own privileges. *Panic-on-input* — probed duplicate-key, bad-indent, tab/control-char, multi-doc, non-mapping-root, unterminated-flow, `!input` blueprint tags, deep nesting, anchors: **every** malformed/typed/truncated input **errors, never panics** (all production code is panic-free; every `unwrap`/`expect` is `#[cfg(test)]`). *Fail-closed versioning* — unknown storage `minor_version` hard-errors (no silent fallback to an older parser). *Injection* — no SQL/shell/path interpolation; the tool emits diagnostics only and persists nothing in P1. `homecore-migrate` **19 → 21** tests (`--no-default-features`), 0 failed. Behaviour otherwise unchanged; Python deterministic proof PASS, hash unchanged (`homecore-migrate` is off the signal proof path). - **`homecore-recorder` security review (ADR-132 surfaces) — two real bounding fixes; SQL-injection & NaN-index dimensions confirmed clean with evidence.** Beyond-SOTA review of the HA-compat state recorder (DB persistence + history + ruvector semantic search), the crux being its DB-backed SQL-injection surface. **Findings + fixes:** (1) **Memory-DoS — unbounded `get_state_history`.** The history query carried no `LIMIT`, so a wide `[since, until]` window over a high-frequency entity (a per-second sensor ≈ 86k rows/day) would load an unbounded row set into a single in-memory `Vec`. Added a hard `LIMIT MAX_HISTORY_ROWS` (1,000,000 — generous enough never to truncate a realistic history graph, bounded enough to cap the worst case); the sibling search paths were already `k`-bounded. (2) **Disk-DoS / documented-but-missing `purge`.** The README + HA-compat table advertised `Recorder::purge(older_than)` as a capability, but **no such method existed** — i.e. no retention path at all → unbounded disk growth. Implemented a **transactional** `purge` that deletes `states` + `events` strictly **older than** the cutoff (**exclusive** boundary — idempotent, no off-by-one; a row at the cutoff instant is kept) and **garbage-collects** orphaned `state_attributes` blobs (a dedup-shared blob is dropped only once its last referencing state is gone); all three deletes run in one transaction so a mid-purge failure rolls back cleanly (no states-deleted-but-events-kept corruption). **Confirmed clean with evidence:** SQL injection — **every** query in `db.rs` uses bound `?` parameters (no `format!`/string-concat of user data into SQL); the lone `format!` builds the LIKE *pattern*, which is itself bound as a parameter with `ESCAPE '\\'` and metacharacter escaping. Pinned: a state value `'; DROP TABLE states; --` is stored/queried **literally** (table survives), and a `%`/`_` in a search query matches **literally**, not as a wildcard. NaN-index poisoning (the calibration/vitals/geo class) — **structurally impossible** here: embeddings are SHA-256 → `i32` → `f32` (an `i32` cast to `f32` is always finite, never NaN/Inf), with an all-zero-digest norm guard; probed empty-index search, empty-string query, and `k=0` — all return `Ok(0)`, **no panic**. Fail-closed write path — a removal event yields `Ok(None)`, semantic-index failure is logged not propagated (best-effort, never blocks the durable SQLite write), and `EntityId` parsing failures fall back rather than panic. **6 new pinning tests** (SQL-injection literal-storage, LIKE-metacharacter literalness, history `LIMIT`, purge exclusive-boundary, purge attribute-GC-keeps-shared, purge old-events): `homecore-recorder` **19 → 25** (`--no-default-features`) / **25 → 31** (`--features ruvector`), 0 failed; the purge-boundary test is a true pin (fails deleting 2 rows under an inclusive cutoff, passes deleting 1 under the exclusive cutoff). Behaviour otherwise unchanged; Python deterministic proof unchanged (recorder is off the signal proof path). diff --git a/docs/adr/ADR-172-cli-core-csi-deserialiser-security-review.md b/docs/adr/ADR-172-cli-core-csi-deserialiser-security-review.md new file mode 100644 index 00000000..001a4f07 --- /dev/null +++ b/docs/adr/ADR-172-cli-core-csi-deserialiser-security-review.md @@ -0,0 +1,117 @@ +# ADR-172: `wifi-densepose-cli` + `wifi-densepose-core` CSI-Deserialiser Security Review + +| Field | Value | +|-------|-------| +| **Status** | Accepted — clean-with-evidence, 4 regression pins added | +| **Date** | 2026-06-15 | +| **Deciders** | ruv | +| **Codename** | **CSI-DESERIALISER-HARDENING** | +| **Supersedes / amends** | none (records review; references ADR-127 §9 for the `core` portion, ADR-136 for the pre-existing DoS ACs) | + +## Context + +The beyond-SOTA security sweep (branch `feat/v2-beyond-sota-sweep`) reviewed each +`v2/` crate for real, reproducible defects. Two crates had no prior dedicated +security ADR: + +- **`wifi-densepose-core`** — the dependency root for all 12 downstream crates + (types, traits, error types, CSI frame primitives). A defect here is a + force-multiplier: every consumer inherits it. +- **`wifi-densepose-cli`** — the user-facing entrypoint + (`calibrate`/`calibrate-serve`/`enroll`/`train-room`/`room-watch` + MAT-gated), + which parses untrusted UDP CSI packets and operator-supplied paths. + +A **specific hypothesis** motivated the core review. Three earlier reviews in +this campaign found a systemic **NaN-state-poisoning bug class** in crates that +depend on core (`wifi-densepose-calibration`, `-vitals`, `-geo`): a non-finite +(NaN/Inf) input latched into persistent filter/accumulator state (IIR `y1/y2`, +running mean, Welford/von-Mises accumulator, voxel grid) → silent **permanent** +feature failure. The load-bearing question for this review: **does that bug class +originate in a shared `wifi-densepose-core` primitive** (making the right fix a +single root fix), or was it independently re-implemented in each downstream +crate (making the three existing local fixes complete)? + +## Decision + +Record the review outcome and lock in the existing DoS guards with regression +tests. **No production code is changed** — both crates were already hardened +(ADR-136 acceptance criteria + `sanitize_room_id`); the gap was *untested* +guards, which a future refactor could silently remove. + +### Load-bearing question — VERDICT: **NO** (the NaN class does not live in core) + +`wifi-densepose-core` exposes **no stateful accumulator of any kind** — no +Welford/running-mean, no von-Mises/circular-mean, no IIR/biquad filter state, no +voxel grid. + +- **MEASURED:** `grep` over `core/src` for + `welford|von_mises|biquad|y1|y2|running_mean|accumulat|voxel|self.*+=` matched + only the `InvalidState` *error* enum variant, "reset state" doc comments, and a + test-only LCG — **zero** stateful logic. The only float math in core is + construction-time projection (`CsiFrame::new` → amplitude/phase via `mapv`) and + pure stateless `utils` functions; nothing persists across frames. +- **Corroboration:** `wifi-densepose-calibration::Features::from_series` + (`extract.rs:103–133`) already filters non-finite samples → `Features::ZERO`. + The downstream fixes are independently re-implemented, confirming each crate + rolls its own accumulator and each local fix is correct and complete. **A fix + in core would be a no-op (there is nothing to fix).** + +Consequence: the NaN-state-poisoning class is a *downstream-local* pattern, not a +core-rooted defect. No hidden fourth instance exists in the shared primitive. + +### Findings (all pins — guards already present, now tested) + +| # | Location | Guard (pre-existing) | Regression pin | Evidence (MEASURED) | +|---|----------|----------------------|----------------|---------------------| +| 1 | `core` `types.rs:801` `from_canonical_bytes` | `saturating_mul` shape-vs-length check before `Vec::with_capacity(rows*cols)` | `canonical_decode_oversized_shape_is_bounded_not_allocated` | With guard removed: **panics `capacity overflow` at `types.rs:801`**; with guard: passes | +| 2 | `core` `types.rs` decoder | typed `CanonicalDecodeError`, never panics | `canonical_decode_never_panics_on_arbitrary_bytes` (fuzz sweep) | panic-free on arbitrary bytes | +| 3 | `cli` `calibrate.rs:276–291` | length check `buf.len() < 20 + n_pairs*2` before `Array2::zeros(n_antennas*n_subcarriers)` | `test_parse_csi_packet_oversized_claim_is_rejected_not_allocated` | 255×65535 claim in a 2 KB packet → `None` (no allocation) | +| 4 | `cli` `calibrate.rs` parser | `None`-returning on malformed input | `test_parse_csi_packet_never_panics_on_arbitrary_bytes` (fuzz sweep) | panic-free on arbitrary UDP bytes | + +### Dimensions confirmed clean (with evidence) + +1. **Panic-on-adversarial-input = 0** — `from_canonical_bytes` returns a typed + error for every malformed class; `parse_csi_packet` returns `None`. Both + fuzz-swept panic-free. +2. **NaN handling** — `Confidence::new` rejects NaN + (`!(0.0..=1.0).contains(&NaN)` ⇒ `Err`); `compute_bounding_box` / + `to_flat_array` are NaN-tolerant (f32 min/max ignore NaN). +3. **Empty-frame safety** — `amplitude_variance` / `mean_amplitude` are + panic-free on an empty `Array2` (ndarray 0.17 returns finite / `None`). +4. **Unbounded-memory DoS** — bounded in both deserialisers (findings 1 & 3). +5. **Path traversal** — `calibrate-serve` defends every client-supplied + `room_id`/`bank`/`baseline` via `sanitize_room_id` (`[A-Za-z0-9_-]`, 64-char + cap) with existing tests; bearer-auth gate + non-loopback-bind warning present. + `mat export` writes to an operator-supplied `PathBuf` (acceptable CLI behavior). +6. **Secrets** — `--token` is read from `CALIBRATE_TOKEN` env, never embedded. + +## Validation + +- `cargo test -p wifi-densepose-core` → **35 → 37** lib passed, 0 failed (+3 doctests) +- `cargo test -p wifi-densepose-cli --no-default-features` → **24 → 26** passed, 0 failed +- `cargo test --workspace --no-default-features` → **exit 0**, 0 failed +- `python archive/v1/data/proof/verify.py` → **VERDICT: PASS**, hash + `f8e76f21a0f9852b70b6d9dd5318239f6b20cbcb4cdd995863263cecdc446f7a` **unchanged** + (core/cli are off the signal proof path — confirms no pipeline alteration) + +## Consequences + +### Positive +- Two CSI deserialisers (the untrusted-input boundary of both the library root + and the network-facing CLI) now have their DoS guards pinned against + regression — a future refactor that drops a length check fails CI. +- The NaN-state-poisoning class is settled as downstream-local; reviewers no + longer need to suspect a shared-root defect, and the three prior local fixes + are confirmed complete. + +### Negative +- None. Test-only change; no behavior or API change. + +### Neutral +- The `core` portion is also noted in ADR-127 §9 (shared security-review log); + this ADR is the canonical record for the `wifi-densepose-cli` review. + +## Links +- ADR-127 — HOMECORE state machine (shared security-review log, §9) +- ADR-136 — pre-existing CSI deserialiser DoS acceptance criteria +- ADR-151 — per-room calibration (`calibrate`/`calibrate-serve` surfaces) diff --git a/v2/crates/wifi-densepose-cli/src/calibrate.rs b/v2/crates/wifi-densepose-cli/src/calibrate.rs index 7baf1948..3af27491 100644 --- a/v2/crates/wifi-densepose-cli/src/calibrate.rs +++ b/v2/crates/wifi-densepose-cli/src/calibrate.rs @@ -471,6 +471,54 @@ mod tests { assert!(ht.record(&f).is_err()); } + /// Security pin (review 2026-06, ADR-127): the UDP parser is the CLI's + /// widest attack surface — `calibrate` / `enroll` / `room-watch` bind it to + /// 0.0.0.0 by default, so any host on the LAN can send arbitrary bytes. A + /// header that *claims* a huge `n_antennas * n_subcarriers` must be rejected + /// by the length check BEFORE the `Array2::zeros` allocation, so a single + /// small datagram can never trigger a multi-MB allocation (unbounded-memory + /// DoS). The largest possible claim (255 × 65535 pairs ≈ 33 MB of IQ) inside + /// a RECV_BUF-sized (2048-byte) datagram parses to `None`, never OOMs. + #[test] + fn test_parse_csi_packet_oversized_claim_is_rejected_not_allocated() { + let mut buf = vec![0u8; RECV_BUF]; + buf[0..4].copy_from_slice(&0xC511_0001u32.to_le_bytes()); + buf[4] = 1; // node_id + buf[5] = 255; // n_antennas (max) + buf[6..8].copy_from_slice(&65535u16.to_le_bytes()); // n_subcarriers (max) + buf[8..12].copy_from_slice(&2432u32.to_le_bytes()); + // n_pairs = 255 * 65535 = 16_711_425 → needs ~33 MB of IQ bytes that a + // 2048-byte datagram cannot carry → length check fails → None. + assert!(parse_csi_packet(&buf, "ht20").is_none()); + } + + /// Security pin (review 2026-06): the parser must never panic on ANY byte + /// string — truncated headers, lying length fields, odd sizes. IQ-loop + /// indexing is guarded by the length check; this sweeps a spread of + /// adversarial inputs to lock in panic-on-adversarial-input = 0. + #[test] + fn test_parse_csi_packet_never_panics_on_arbitrary_bytes() { + let mut st = 0x1234_5678u64; + let mut next = move || { + st = st + .wrapping_mul(6_364_136_223_846_793_005) + .wrapping_add(1_442_695_040_888_963_407); + (st >> 33) as u8 + }; + for len in 0..600usize { + let buf: Vec = (0..len).map(|_| next()).collect(); + for tier in ["ht20", "he20", "garbage"] { + let _ = parse_csi_packet(&buf, tier); + } + } + // Valid magic, lying n_subcarriers, no payload → None (not a panic). + let mut buf = vec![0u8; 20]; + buf[0..4].copy_from_slice(&0xC511_0001u32.to_le_bytes()); + buf[5] = 3; + buf[6..8].copy_from_slice(&500u16.to_le_bytes()); + assert!(parse_csi_packet(&buf, "ht20").is_none()); + } + #[test] fn test_freq_to_channel_24ghz() { assert_eq!(freq_mhz_to_channel(2437), 6); diff --git a/v2/crates/wifi-densepose-core/src/types.rs b/v2/crates/wifi-densepose-core/src/types.rs index 9e557124..a2df7169 100644 --- a/v2/crates/wifi-densepose-core/src/types.rs +++ b/v2/crates/wifi-densepose-core/src/types.rs @@ -1636,6 +1636,67 @@ mod tests { } } + /// Security pin (review 2026-06, ADR-127) — `from_canonical_bytes` is a + /// deserialisation boundary for replayed/forwarded captures. A forged header + /// advertising an enormous `rows × cols` must be rejected by the + /// shape-vs-length check (`expect` uses saturating multiplies) BEFORE the + /// `Vec::with_capacity(rows * cols)` allocation — otherwise an attacker could + /// drive a multi-GB allocation from a few header bytes (unbounded-memory + /// DoS). The check guarantees `rows*cols*16 <= bytes.len()`, so the capacity + /// is bounded by the input the caller already holds. This must not OOM. + #[test] + fn canonical_decode_oversized_shape_is_bounded_not_allocated() { + use ndarray::Array2; + let meta = CsiMetadata::new(DeviceId::new("n"), FrequencyBand::Band2_4GHz, 1); + let data = Array2::from_shape_fn((1, 2), |(_, c)| Complex64::new(c as f64, 0.0)); + let mut bytes = CsiFrame::new(meta, data).to_canonical_bytes(); + + // The (rows, cols) u32 pair is the last 8 bytes before the payload. + // Overwrite with a maximal claim (u32::MAX × u32::MAX) and lop off the + // payload so the buffer is tiny but the header lies enormously. + let shape_off = bytes.len() - 8 - 2 * 16; // 2 samples × 16 bytes payload + bytes[shape_off..shape_off + 4].copy_from_slice(&u32::MAX.to_le_bytes()); + bytes[shape_off + 4..shape_off + 8].copy_from_slice(&u32::MAX.to_le_bytes()); + bytes.truncate(shape_off + 8); // drop the real payload + + // expect = MAX*MAX*16 (saturated) > found → PayloadMismatch, no alloc. + assert!(matches!( + CsiFrame::from_canonical_bytes(&bytes), + Err(CanonicalDecodeError::PayloadMismatch { .. }) + )); + } + + /// Security pin (review 2026-06) — the decoder must never panic on arbitrary + /// bytes: every malformed input is a typed `CanonicalDecodeError`, never an + /// unwinding panic (panic-on-adversarial-input = 0). Sweep truncations and a + /// deterministic fuzz spread. + #[test] + fn canonical_decode_never_panics_on_arbitrary_bytes() { + use ndarray::Array2; + let mut meta = CsiMetadata::new(DeviceId::new("node"), FrequencyBand::Band5GHz, 36); + meta.antenna_config.spacing_mm = Some(50.0); + let data = Array2::from_shape_fn((2, 8), |(r, c)| Complex64::new(r as f64, c as f64)); + let good = CsiFrame::new(meta, data).to_canonical_bytes(); + + // Every prefix of a valid encoding must decode without panicking. + for n in 0..good.len() { + let _ = CsiFrame::from_canonical_bytes(&good[..n]); + } + // Deterministic LCG fuzz over varied lengths. + let mut st = 0xDEAD_BEEFu64; + for len in 0..400usize { + let buf: Vec = (0..len) + .map(|_| { + st = st + .wrapping_mul(6_364_136_223_846_793_005) + .wrapping_add(1_442_695_040_888_963_407); + (st >> 33) as u8 + }) + .collect(); + let _ = CsiFrame::from_canonical_bytes(&buf); + } + } + /// AC8c (review finding 7) — `Some(Uuid::nil())` calibration is an /// encoding error: nil is the wire sentinel for `None`, so encoding it /// would alias two distinct frames to one byte string (and one witness).