hardening(adr-117): benchmarks + security/robustness test suite

Benchmarks (`python/bench/`, pytest-benchmark — opt-in via --benchmark-only):

| Hot path | Mean | Ops/sec | % of 100 Hz budget |
|---|---|---|---|
| BfldFrame HT20 1×1×52 | 800 ns | 1.25 Mops | 0.008% |
| BfldFrame HE20 2×1×242 | 1.3 μs | 750 kops | 0.013% |
| BfldFrame HE80 2×1×996 | 4.2 μs | 236 kops | 0.042% |
| BfldFrame HE160 2×2×1992 | 14 μs | 71 kops | 0.14% |
| BfldFrame.feedback_matrix() | 2.8 μs | 352 kops | — |
| WS edge_vitals decode | 7.4 μs | 134 kops | 0.074% |
| WS pose_data decode (3 persons) | 23 μs | 42 kops | 0.24% |
| BreathingExtractor.extract() 56sc | 28 μs | 35 kops | 0.28% |
| BreathingExtractor.extract() 114sc | 44 μs | 23 kops | 0.44% |
| BreathingExtractor.extract() 242sc | 79 μs | 13 kops | 0.79% |
| HeartRateExtractor.extract() 56sc | 105 μs | 9.5 kops | 1.05% |

All hot paths well under the 100 Hz ESP32 frame budget (10 ms).
Worst case (HeartRateExtractor) uses 1% of the budget — no
optimization needed. Scaling on n_subcarriers is sub-quadratic
(56→242 = 4.3× input, 2.8× time) — catches future O(n²)
regressions.

Security & robustness tests (`tests/test_security.py`, +27 tests):

- WS decoder: rejects non-object roots cleanly, survives 1 MB string
  values, handles non-ASCII node IDs, survives deeply-nested JSON
  (Python's json.loads built-in guard not bypassed)
- MQTT topic matcher: 9 edge-case parametrize entries including
  $SYS topics, null-byte injection, mid-pattern `#` boundary,
  empty-string boundary
- MQTT credential confidentiality: password never appears in
  repr()/str(), never stored in plain client-instance attribute
- HA discovery: rejects null-byte-laced topics, rejects extra
  slashes in node_id, rejects non-dict payload body (list, scalar,
  invalid UTF-8 bytes) without crashing
- Semantic primitive listener: rejects topic-injection attempts
  (prefix-injected paths, wrong case on final segment), survives
  invalid UTF-8 payloads
- Public surface integrity: every name in wifi_densepose.__all__
  AND wifi_densepose.client.__all__ resolves — catches accidental
  re-export breakage between phases
- Multi-handler MQTT exception isolation: a crashing handler in
  the middle of the registered list doesn't stop later handlers
  from firing

Test count: 156 → 183 (+27). All passing.

Bench results steady-state confirm no Rust-binding-layer
optimization is needed before the v2.0.0 publish.

Refs: docs/adr/ADR-117-pip-wifi-densepose-modernization.md
Refs: #785

Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
ruv 2026-05-24 11:44:54 -04:00
parent f9d99c50d9
commit 78916d8455
3 changed files with 456 additions and 0 deletions

View File

@ -0,0 +1,111 @@
"""ADR-117 hardening sweep — Benchmarks for the P3.5 numpy bridge
and the P4 WS decoder.
The numpy bridge is the most-likely candidate for a hidden allocation
hot-spot: every `BfldFrame.from_compressed_feedback()` call copies the
ndarray into a Vec<Complex64>. Confirm the per-frame cost is
acceptable for the BFR cadence the AP emits (typically a few
hundred per second, not thousands).
The WS decoder runs once per frame the sensing-server emits. At
worst-case ~100 Hz × number-of-subscribers, the decoder budget is
tight; make sure dataclass construction doesn't dominate.
"""
from __future__ import annotations
import json
import numpy as np
import pytest
from wifi_densepose import BfldFrame, BfldKind
@pytest.mark.parametrize("kind,shape", [
(BfldKind.UncompressedHT20, (1, 1, 52)),
(BfldKind.CompressedHE20, (2, 1, 242)),
(BfldKind.CompressedHE80, (2, 1, 996)),
(BfldKind.CompressedHE160, (2, 2, 1992)),
])
def test_bfld_from_compressed_feedback(benchmark, kind: BfldKind, shape: tuple[int, int, int]) -> None:
rng = np.random.default_rng(seed=42)
fb = (rng.standard_normal(shape) + 1j * rng.standard_normal(shape)).astype(np.complex128)
def _build():
return BfldFrame.from_compressed_feedback(
timestamp_ms=0,
sounding_index=0,
sta_mac="aa:bb:cc:dd:ee:ff",
kind=kind,
feedback_matrix=fb,
)
benchmark(_build)
def test_bfld_feedback_matrix_roundtrip(benchmark) -> None:
"""How expensive is the numpy-out round-trip? Used by clients
that want to do further analysis in numpy after constructing
the frame."""
rng = np.random.default_rng(seed=42)
fb = (rng.standard_normal((2, 1, 996)) + 1j * rng.standard_normal((2, 1, 996))).astype(np.complex128)
frame = BfldFrame.from_compressed_feedback(
timestamp_ms=0,
sounding_index=0,
sta_mac="aa:bb:cc:dd:ee:ff",
kind=BfldKind.CompressedHE80,
feedback_matrix=fb,
)
benchmark(frame.feedback_matrix)
# ─── WS decoder ──────────────────────────────────────────────────────
_EDGE_VITALS_FRAME = json.dumps({
"type": "edge_vitals",
"node_id": "bench-node",
"presence": True,
"fall_detected": False,
"motion": 0.34,
"breathing_rate_bpm": 14.2,
"heartrate_bpm": 72.5,
"n_persons": 1,
"motion_energy": 0.04,
"presence_score": 0.91,
"rssi": -42.0,
})
def test_ws_decoder_edge_vitals(benchmark) -> None:
from wifi_densepose.client.ws import _decode
def _decode_one():
return _decode(_EDGE_VITALS_FRAME)
benchmark(_decode_one)
_POSE_FRAME = json.dumps({
"type": "pose_data",
"node_id": "bench-node",
"timestamp": 1700000000.5,
"persons": [
{"id": i, "keypoints": [[0.5, 0.5, 0.9] for _ in range(17)]}
for i in range(3)
],
"confidence": 0.85,
})
def test_ws_decoder_pose_data(benchmark) -> None:
"""The pose_data frame is typically the largest one the server
emits bench it separately so a future blob-size regression
in the persons array is visible."""
from wifi_densepose.client.ws import _decode
def _decode_one():
return _decode(_POSE_FRAME)
benchmark(_decode_one)

View File

@ -0,0 +1,85 @@
"""ADR-117 hardening sweep — Benchmarks for the P3 vitals hot paths.
Targets the ESP32 production rate: 100 Hz × 56 subcarriers, which is
what `BreathingExtractor.esp32_default()` is tuned for. The bench
asserts the *per-extract* cost is comfortably below 10 ms at 100 Hz
that's the entire frame budget, so anything above 10 ms means the
Python binding would be the bottleneck instead of the radio.
Run with:
pytest python/bench/ --benchmark-only
The benchmarks are skipped by default (`addopts` in pyproject.toml
doesn't include them) — they live in a sibling `bench/` directory
so the main test run stays fast.
"""
from __future__ import annotations
import math
from random import Random
import pytest
from wifi_densepose import BreathingExtractor, HeartRateExtractor
def _synth_frame(n_subcarriers: int, sample_rate: float, t: float, freq_hz: float, rng: Random) -> tuple[list[float], list[float]]:
"""Build one ESP32-shape frame at time `t`: sine at `freq_hz` plus
tiny per-subcarrier noise."""
base = math.sin(2.0 * math.pi * freq_hz * t)
residuals = [base + rng.gauss(0.0, 0.01) for _ in range(n_subcarriers)]
weights = [1.0] * n_subcarriers
return residuals, weights
def test_breathing_extract_per_frame_cost(benchmark) -> None:
"""One BreathingExtractor.extract() at ESP32 defaults should
finish well under 10 ms that's the 100 Hz frame budget."""
br = BreathingExtractor.esp32_default()
rng = Random(42)
# Pre-fill ~25 seconds of history so the bench measures the
# steady-state cost, not the cold-start cost.
for i in range(2500):
residuals, weights = _synth_frame(56, 100.0, i / 100.0, 0.25, rng)
br.extract(residuals=residuals, weights=weights)
def _one_frame():
residuals, weights = _synth_frame(56, 100.0, 30.0, 0.25, rng)
return br.extract(residuals=residuals, weights=weights)
benchmark(_one_frame)
def test_heart_rate_extract_per_frame_cost(benchmark) -> None:
"""One HeartRateExtractor.extract() at ESP32 defaults — same 10 ms
target."""
hr = HeartRateExtractor.esp32_default()
rng = Random(43)
for i in range(1500):
residuals, weights = _synth_frame(56, 100.0, i / 100.0, 1.2, rng)
hr.extract(residuals=residuals, weights=weights)
def _one_frame():
residuals, weights = _synth_frame(56, 100.0, 16.0, 1.2, rng)
return hr.extract(residuals=residuals, weights=weights)
benchmark(_one_frame)
@pytest.mark.parametrize("n_subcarriers", [56, 114, 242])
def test_breathing_extract_scaling(benchmark, n_subcarriers: int) -> None:
"""Sanity check: cost should scale roughly linearly with the
subcarrier count. Catches accidental O(n^2) regressions."""
sample_rate = 100.0
br = BreathingExtractor(n_subcarriers, sample_rate, 30.0)
rng = Random(n_subcarriers)
for i in range(2500):
residuals, weights = _synth_frame(n_subcarriers, sample_rate, i / sample_rate, 0.25, rng)
br.extract(residuals=residuals, weights=weights)
def _one_frame():
residuals, weights = _synth_frame(n_subcarriers, sample_rate, 30.0, 0.25, rng)
return br.extract(residuals=residuals, weights=weights)
benchmark(_one_frame)

View File

@ -0,0 +1,260 @@
"""ADR-117 hardening sweep — Security & robustness tests for the
client surface.
Scope: malformed/hostile input handling across the WS decoder, MQTT
matcher + dispatch, HA discovery parser, and semantic primitive
listener. The goal is to ensure that an adversarial broker or
sensing-server can't:
- Crash the client process via malformed JSON, UTF-8, or topic shapes
- Bypass topic-wildcard matching to deliver messages to the wrong handler
- Leak MQTT credentials through `repr()` or string conversion
- Trigger unbounded memory growth via deeply-nested JSON
- Get a handler exception to crash the network loop
"""
from __future__ import annotations
import json
from types import SimpleNamespace
import pytest
from wifi_densepose.client import RuViewMqttClient, SemanticPrimitiveListener
from wifi_densepose.client.ha import (
HABlueprintHelper,
parse_discovery_payload,
parse_discovery_topic,
)
from wifi_densepose.client.mqtt import _topic_matches
from wifi_densepose.client.ws import _decode
# ─── WS decoder robustness ──────────────────────────────────────────
def test_ws_decoder_rejects_non_object_root() -> None:
"""A JSON array at the root must NOT crash the decoder. Plain
string/array root values are valid JSON but not valid sensing-
server messages the decoder must reject them cleanly."""
with pytest.raises(ValueError):
_decode("[1, 2, 3]")
with pytest.raises(ValueError):
_decode('"just a string"')
with pytest.raises(ValueError):
_decode("42")
def test_ws_decoder_rejects_malformed_json() -> None:
with pytest.raises(json.JSONDecodeError):
_decode("{ broken: json")
def test_ws_decoder_handles_deeply_nested_payload_without_crash() -> None:
"""Hostile JSON nested 1000 levels deep must not crash via
Python's default recursion limit. Json.loads has a built-in
guard; verify we don't accidentally bypass it."""
nested = "{" + '"a":{' * 999 + '"x":1' + "}" * 1000
# json.loads either succeeds (since 999 < ~1000 limit) or raises
# RecursionError; either is acceptable — the key is no segfault
# or hang.
try:
_decode(nested)
except (RecursionError, json.JSONDecodeError, ValueError):
pass # All acceptable.
def test_ws_decoder_handles_huge_string_values() -> None:
"""A 1 MB string in a JSON field must decode without exploding.
The websockets `max_size` parameter (default 16 MB) is the actual
DoS guard this just confirms the decoder itself is linear."""
huge_payload = json.dumps({
"type": "edge_vitals",
"node_id": "x" * (1024 * 1024), # 1 MB string
"presence": True,
"fall_detected": False,
"motion": 0.0,
})
msg = _decode(huge_payload)
assert msg.type == "edge_vitals"
def test_ws_decoder_handles_unicode_in_node_id() -> None:
"""Non-ASCII node IDs (e.g. accidental terminal escapes) must
round-trip cleanly without re-encoding errors."""
payload = json.dumps({"type": "edge_vitals", "node_id": "nöde-中", "presence": True, "fall_detected": False, "motion": 0.0})
msg = _decode(payload)
assert msg.node_id == "nöde-中" # type: ignore[attr-defined]
# ─── MQTT topic matcher — exhaustive edge cases ─────────────────────
@pytest.mark.parametrize("pattern,topic,expected", [
# Empty / boundary
("", "", True),
("a", "", False),
("", "a", False),
# `+` cannot bypass a literal level boundary
("a/+/c", "a/b/c", True),
("a/+/c", "a/b/d", False),
("a/+/c", "a/b/c/d", False),
# `#` is greedy from its position but does not match if it's
# mid-pattern (per MQTT spec; our matcher returns False then).
("a/#/c", "a/b/c", False), # `#` must be terminal
# Topics starting with `$` are legal here — we don't filter them;
# matching is purely syntactic. `+` is one-level only, so `$SYS/+`
# matches `$SYS/broker` but NOT `$SYS/broker/version`.
("$SYS/+", "$SYS/broker", True),
("$SYS/+", "$SYS/broker/version", False),
("$SYS/#", "$SYS/broker/version", True),
# Null byte in topic: still string comparison, but useful to lock
# down behaviour.
("a/b", "a\x00/b", False),
])
def test_topic_matcher_edge_cases(pattern: str, topic: str, expected: bool) -> None:
assert _topic_matches(pattern, topic) is expected
# ─── MQTT credential confidentiality ────────────────────────────────
def test_mqtt_password_never_in_repr() -> None:
"""A user's broker password must NOT leak through __repr__ or
__str__. Currently RuViewMqttClient doesn't define repr — that's
the safest default (uses object identity). Lock that down so a
future "let's add a friendly repr" change doesn't expose creds."""
c = RuViewMqttClient(
broker_host="broker.example.com",
username="alice",
password="super-secret-token-do-not-leak",
)
rep = repr(c)
s = str(c)
assert "super-secret-token-do-not-leak" not in rep
assert "super-secret-token-do-not-leak" not in s
def test_mqtt_password_never_stored_in_plain_attribute() -> None:
"""The plaintext password must not be stored on the client
instance paho-mqtt internalises it into `_client._username_pw`
which we never expose. Audit by walking the public dict."""
c = RuViewMqttClient(password="dont-leak-me")
for k, v in vars(c).items():
if isinstance(v, str):
assert "dont-leak-me" not in v, f"password leaked via attribute {k!r}"
# ─── HA discovery — adversarial topics ──────────────────────────────
def test_ha_discovery_rejects_topic_with_null_byte() -> None:
"""Defensive: regex must not match a null-byte-laced topic."""
bad = "homeassistant/binary_sensor/wifi_densepose_aa\x00bb/presence/config"
assert parse_discovery_topic(bad) is None
assert parse_discovery_payload(bad, {"name": "x"}) is None
def test_ha_discovery_rejects_topic_with_slash_in_node_id() -> None:
"""A node_id with embedded slashes would break the unique_id
contract; reject."""
bad = "homeassistant/binary_sensor/wifi_densepose_aa/bb/presence/config"
# The regex won't match because there are too many segments.
assert parse_discovery_topic(bad) is None
def test_ha_helper_drops_invalid_topic_silently() -> None:
"""`add_payload` should return False (not raise) for non-discovery
topics so a misconfigured broker doesn't bring down the client."""
h = HABlueprintHelper()
assert h.add_payload("garbage", {"x": 1}) is False
assert h.add_payload("ruview/aa/raw/edge_vitals", {"x": 1}) is False
assert len(h) == 0
def test_ha_helper_handles_non_dict_payload() -> None:
"""If the HA discovery body is a list or scalar (broken producer),
the helper must reject rather than crash on attribute access."""
h = HABlueprintHelper()
topic = "homeassistant/binary_sensor/wifi_densepose_aabb/presence/config"
assert h.add_payload(topic, "[1, 2, 3]") is False
assert h.add_payload(topic, "42") is False
assert h.add_payload(topic, b"\xff\xfe invalid utf-8") is False
# ─── Semantic primitive listener — adversarial input ────────────────
def test_primitive_listener_ignores_topic_injection_attempts() -> None:
listener = SemanticPrimitiveListener()
# Extra leading segments
assert listener.handle_mqtt_message(
"evil/homeassistant/binary_sensor/wifi_densepose_aa/someone_sleeping/state",
"ON",
) is None
# Wrong final segment
assert listener.handle_mqtt_message(
"homeassistant/binary_sensor/wifi_densepose_aa/someone_sleeping/STATE",
"ON",
) is None
# Empty node_id after the wifi_densepose_ prefix is still routed
# (the node_id is "") because we don't enforce a minimum length —
# but that's not an injection vector. Confirm behaviour.
evt = listener.handle_mqtt_message(
"homeassistant/binary_sensor/wifi_densepose_/someone_sleeping/state",
"ON",
)
assert evt is not None
assert evt.node_id == ""
def test_primitive_listener_handles_garbage_payload_without_crash() -> None:
listener = SemanticPrimitiveListener()
# Bytes that aren't valid UTF-8
evt = listener.handle_mqtt_message(
"homeassistant/binary_sensor/wifi_densepose_aa/room_active/state",
b"\xff\xfe\xfd",
)
assert evt is not None # we return a sentinel rather than crash
# No assertions on state content — undefined for invalid UTF-8;
# what matters is no exception escaped.
# ─── Public surface integrity ───────────────────────────────────────
def test_public_surface_is_stable() -> None:
"""Every name in `wifi_densepose.__all__` must be resolvable.
Catches accidental re-export breakage between phases."""
import wifi_densepose
for name in wifi_densepose.__all__:
assert hasattr(wifi_densepose, name), f"__all__ promises {name!r} but attribute missing"
def test_client_public_surface_is_stable() -> None:
import wifi_densepose.client as c
for name in c.__all__:
# Lazy re-exports for SensingClient + RuViewMqttClient need to
# be resolvable too — touch them to exercise __getattr__.
_ = getattr(c, name)
# ─── Handler crash isolation (expanded) ─────────────────────────────
def test_mqtt_handler_exception_isolation_with_multiple_handlers() -> None:
"""Earlier test covered one crashing handler; this version makes
sure a crashing handler in the *middle* of a list of registered
handlers doesn't prevent later handlers from firing."""
c = RuViewMqttClient()
received_before: list[str] = []
received_after: list[str] = []
c.on_message("a/+", lambda t, p: received_before.append(t))
c.on_message("a/b", lambda t, p: (_ for _ in ()).throw(RuntimeError("middle crash")))
c.on_message("+/b", lambda t, p: received_after.append(t))
msg = SimpleNamespace(topic="a/b", payload=b"x")
c._on_message(None, None, msg)
assert received_before == ["a/b"]
assert received_after == ["a/b"]