From fd8b9c30e74ae79995451a83ca5d55f1cd26fc35 Mon Sep 17 00:00:00 2001 From: ruv Date: Sun, 24 May 2026 10:36:06 -0400 Subject: [PATCH] =?UTF-8?q?docs(adr-117):=20seed=20branch=20=E2=80=94=20AD?= =?UTF-8?q?R-117=20pip-modernization=20spec=20+=20soul-signature=20researc?= =?UTF-8?q?h=20bundle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two artifacts landing together on this new branch as the prerequisite documentation for the v2.0.0 Python wheel modernization work: 1. **docs/adr/ADR-117-pip-wifi-densepose-modernization.md** (644 lines) — Plan to bring the 2025-published `wifi-densepose` PyPI package (last release v1.1.0, 2025-06-07, 11.5 months out of sync) up to the current Rust v2/ workspace SOTA. Recommends PyO3 + maturin with abi3-py310 (one binary covers Python 3.10–3.13 per OS/arch), first-wheel scope = core + vitals + signal crates (~5 MB), v1.99.0 tombstone + 90-day un-yank window for v1.1.0, v2.0.0 hard break. Open questions catalogued; phases P1–P6+ laid out with concrete acceptance criteria. 2. **docs/research/soul/** (5 files, ~1,450 lines) — Soul Signature research spec: 7-channel electromagnetic biometric fingerprint (AETHER 128-dim + cardiac HR/HRV + cardiac waveform morphology + respiratory pattern + gait timing + skeletal proportions + subcarrier reflection profile), fused into one RVF graph file. Includes 60s scanning protocol, 5-layer security model, threat-model + mitigations, references to existing ADRs (014, 021, 024, 027, 030, 039, 079, 106, 108, 109, 110, 115). Marked "Research Specification (Pre-Implementation)". Explicit "what this is NOT" disclaimers preempt pseudoscience drift; every discriminative-power claim either cites a measurement or is marked "open research; baseline TBD". Branch off main at HEAD; ready for /loop 10m implementation iterations. Co-Authored-By: claude-flow --- ...DR-117-pip-wifi-densepose-modernization.md | 644 ++++++++++++++++++ docs/research/soul/README.md | 116 ++++ docs/research/soul/references.md | 138 ++++ docs/research/soul/scanning-process.md | 306 +++++++++ docs/research/soul/security.md | 367 ++++++++++ docs/research/soul/specification.md | 525 ++++++++++++++ 6 files changed, 2096 insertions(+) create mode 100644 docs/adr/ADR-117-pip-wifi-densepose-modernization.md create mode 100644 docs/research/soul/README.md create mode 100644 docs/research/soul/references.md create mode 100644 docs/research/soul/scanning-process.md create mode 100644 docs/research/soul/security.md create mode 100644 docs/research/soul/specification.md diff --git a/docs/adr/ADR-117-pip-wifi-densepose-modernization.md b/docs/adr/ADR-117-pip-wifi-densepose-modernization.md new file mode 100644 index 00000000..85cdfa3b --- /dev/null +++ b/docs/adr/ADR-117-pip-wifi-densepose-modernization.md @@ -0,0 +1,644 @@ +# ADR-117: pip `wifi-densepose` modernization via PyO3 + maturin bindings + +| Field | Value | +|-------|-------| +| **Status** | Proposed | +| **Date** | 2026-05-24 | +| **Deciders** | ruv | +| **Codename** | **PIP-PHOENIX** — rising from a pure-Python server to Rust-core Python bindings | +| **Relates to** | [ADR-021](ADR-021-esp32-vitals.md) (ESP32 vitals), [ADR-028](ADR-028-esp32-capability-audit.md) (capability audit / witness), [ADR-115](ADR-115-home-assistant-integration.md) (HA-DISCO + HA-MIND MQTT semantics), [ADR-116](ADR-116-cog-ha-matter-seed.md) (HA-COG Seed packaging) | +| **Tracking issue** | TBD — file under RuView issue tracker | + +--- + +## 1. Context + +### 1.1 What the pip package is today + +`wifi-densepose` v1.1.0 was published to PyPI on **2025-06-07** (two releases the same +day: 1.0.0 at 13:24 UTC, 1.1.0 at 17:02 UTC). Both wheels carry the tag +`py3-none-any` — no compiled extension, no platform-specific code. The package is a +**pure-Python server application** sourced entirely from `archive/v1/`. + +The package installs a 40-dependency stack including FastAPI, PyTorch, SQLAlchemy, +Redis, Celery, OpenCV, asyncpg, psycopg2, and Scapy (`archive/v1/setup.py:46–87`). +The declared entry points are: + +``` +wifi-densepose = src.cli:cli +wdp = src.cli:cli +``` + +(`archive/v1/setup.py:178–179`) + +The public API surface is centred on a FastAPI HTTP server, a SQLAlchemy/postgres +database layer, and a Redis/Celery task queue — none of which map to the current Rust +architecture. The `__init__.py` exports `app` (FastAPI), `CSIProcessor`, +`PhaseSanitizer`, `PoseEstimator`, `RouterInterface`, `ServiceOrchestrator`, +`HealthCheckService`, and `MetricsService` (`archive/v1/src/__init__.py:54–68`). + +### 1.2 Why this matters now + +ADR-115 (PR #778, merged 2026-05-23) shipped 21 Home Assistant entities, 10 semantic +primitives, mTLS, privacy mode, and a full witness bundle from the Rust crate +`wifi-densepose-sensing-server`. ADR-116 is packaging this as a Cognitum Seed cog. +Neither surface is reachable from `pip install wifi-densepose` — the pip package cannot +import a CsiFrame, decode an edge-vitals packet, call a DSP stage, verify a witness +bundle, or subscribe to the sensing server's MQTT or WebSocket endpoints. The ecosystem +split is now wide enough that the pip package actively misleads new users about what +the project does. + +Three concrete customer pain points: + +1. A Python user who `pip install wifi-densepose` expecting to consume live pose/vitals + data gets a FastAPI server that requires postgres + redis, not a library they can + script against. +2. Integrators writing HA automations or Node-RED flows in Python have no idiomatic + Python API for the v0.7 telemetry surface (ADR-115 entities, semantic primitives). +3. The ADR-028 witness chain (deterministic pipeline proof) is Python-based and + exercised via `archive/v1/data/proof/verify.py`, but it imports from the v1 stack — + it cannot witness the Rust pipeline that is now the production implementation. + +### 1.3 What this ADR is *not* + +- Not a removal of `archive/v1/` from the repository. The v1 codebase stays as a + research archive and its proof bundle stays in `archive/v1/data/proof/`. +- Not a port of the Rust crates to Python. The Rust workspace (`v2/`) is authoritative + and unmodified by this ADR. +- Not a replacement of the `wifi-densepose-sensing-server` Rust binary. The pip + package wraps or clients the binary; it does not reimplement it. +- Not an overlap with ADR-116 (Seed cog packaging). ADR-116 ships a Seed-installable + artifact; ADR-117 ships a Python developer library for scripting, automation, and + prototyping against the Rust stack. + +--- + +## 2. Current state — evidence + +| Artifact | Value | Source | +|---|---|---| +| Latest PyPI version | **1.1.0** | `pypi.org/pypi/wifi-densepose/json` | +| First release date | 2025-06-07T13:24:53Z | PyPI JSON metadata | +| Latest release date | 2025-06-07T17:02:40Z | PyPI JSON metadata | +| Months since last release | **~11.5 months** | as of 2026-05-24 | +| Wheel tag | `py3-none-any` | PyPI simple index | +| Hard dependencies | 40 (torch, fastapi, sqlalchemy, redis, celery, …) | `setup.py:46–87` | +| Entry point | `src.cli:cli` | `setup.py:178` | +| Python requires | `>=3.9` | `setup.py:108` | +| Classifiers Python versions | 3.9, 3.10, 3.11, 3.12 | PyPI JSON classifiers | +| Classifiers status | Beta (4) | PyPI JSON classifiers | +| Current Rust workspace version | **0.3.0** | `v2/Cargo.toml:version` | +| Rust crates in workspace | 20+ | `v2/Cargo.toml` members | +| ADR-115 shipped | 2026-05-23 | PR #778 | + +The v1 source package (`archive/v1/setup.py:112–215`) was clearly designed as an +all-in-one server application, not a reusable library. The `find_packages` call at +line 134 searches from `"."` (the archive root), meaning the wheel ships `src.*` as the +importable namespace. The proof bundle (`archive/v1/data/proof/verify.py:56–57`) imports +`src.hardware.csi_extractor.CSIData` and `src.core.csi_processor.CSIProcessor` — v1 pure +Python only. + +**PyPI org presence check:** a search for other `ruvnet`-published PyPI packages +(`ruvector`, `claude-flow`) returned no matches in the PyPI simple index as of this +writing. The `wifi-densepose` package is currently the only Python entry point for this +project's ecosystem. + +--- + +## 3. Gap analysis + +| Capability | Rust crate(s) | pip v1.1.0 status | Gap severity | +|---|---|---|---| +| `CsiFrame` / `CsiMetadata` core types | `wifi-densepose-core` (`types.rs`) | Not present — v1 uses `CSIData` Python class | **Critical** | +| HR/BR extraction from CSI buffer | `wifi-densepose-vitals` (4-stage pipeline: preprocessor → breathing → heartrate → anomaly) | Stub Python (`src/hardware/csi_extractor.py`) with no DSP | **Critical** | +| Phase sanitization / noise removal | `wifi-densepose-signal` (`phase_sanitizer`, `csi_processor`, `hampel`) | Python stubs in `src/core/phase_sanitizer.py` | **Critical** | +| Motion detection + presence scoring | `wifi-densepose-signal` (`motion.rs`, `MotionDetector`) | Not present | **Critical** | +| RuvSense multistatic sensing (13 modules) | `wifi-densepose-signal/src/ruvsense/` | Not present — ADR-029 post-dates v1 | **Critical** | +| 17-keypoint pose estimation | `wifi-densepose-nn`, `wifi-densepose-mat` | Stub `PoseEstimator` wrapping a `torch.nn.Module` that requires model weights | **High** | +| MQTT publisher (21 HA entities) | `wifi-densepose-sensing-server/src/mqtt/` | Not present — ADR-115 post-dates v1 | **High** | +| Semantic primitives (10 types) | `wifi-densepose-sensing-server/src/semantic/` | Not present | **High** | +| Matter bridge | `wifi-densepose-sensing-server/src/matter/` | Not present | **High** | +| WS/REST client for sensing-server | `wifi-densepose-sensing-server` (Axum) | v1 has a separate FastAPI server; no client | **High** | +| Witness bundle verification | ADR-028 / `scripts/generate-witness-bundle.sh` | `archive/v1/data/proof/verify.py` — proves v1 pipeline only | **High** | +| ESP32-C6 firmware telemetry (ADR-110) | `wifi-densepose-hardware` + `wifi-densepose-sensing-server` | Not present | **Medium** | +| Cross-viewpoint fusion (RuVector) | `wifi-densepose-ruvector/src/viewpoint/` | Not present | **Medium** | +| Semantic-primitive MQTT payload | `wifi-densepose-sensing-server/src/semantic/bus.rs` | Not present | **Medium** | +| PostgreSQL + Redis server mode | `archive/v1/` | Present (v1 only) | Low (not SOTA) | +| FastAPI HTTP REST server | `archive/v1/src/app.py` | Present (v1 only) | Low (not SOTA) | + +--- + +## 4. Decision + +Adopt **PyO3 + maturin Python extension bindings** as the primary modernization path, +shipping the pip package as a platform-native wheel (`manylinux`, `macosx`, `win-amd64`) +with compiled Rust extension modules, plus a pure-Python WS/MQTT client layer that talks +to a running `wifi-densepose-sensing-server` instance. + +This path is called **PIP-PHOENIX**. + +### 4.1 Why PyO3 + maturin over the three rejected alternatives + +| Criterion | **PyO3 + maturin** (chosen) | Subprocess wrapper | REST/WS client only | Pure Python reimpl | +|---|---|---|---|---| +| Performance for DSP | Native Rust speed, zero copy | IPC overhead per call | N/A — no local DSP | Python bottleneck | +| Binary size in wheel | Core + vitals + signal only: ~2 MB stripped | Full sensing-server binary: ~15–30 MB | Minimal (~50 kB) | Minimal (~100 kB) | +| Works offline / no server | Yes | Yes (binary bundled) | No — server required | Partial | +| Proof bundle can cover Rust pipeline | Yes — bindings call the same Rust code the server uses | Partial — server is a black box | No | No | +| Install experience | `pip install wifi-densepose` — wheel has no system deps | `pip install` downloads 25 MB binary | `pip install` — pure Python | `pip install` — pure Python | +| Maintenance surface | Python bindings + Rust workspace | Python thin shim | Python client | Python reimpl must track Rust | +| Async / tokio support | PyO3 0.28 `pyo3-asyncio` or `pyo3-async-runtimes` for async export; sync entry points for the DSP hot path | N/A | Native asyncio on client | N/A | +| GIL concern | DSP-heavy calls release GIL via `py.allow_threads`; tokio runtime per module | N/A | None | N/A | +| Fits existing architecture | Core + vitals + signal already have clean public APIs (`lib.rs` re-exports) | Requires sensing-server to be running | Requires sensing-server | Forks the domain model | + +**Subprocess wrapper** is rejected because shipping a 25 MB pre-built server binary +inside every pip wheel is an unacceptably heavy install, and it makes offline scripting +impossible without starting the server. + +**REST/WS client only** is rejected because it provides zero DSP utility offline and +cannot close the witness gap — the proof bundle must exercise the same pipeline code. + +**Pure Python reimplementation** is the root cause of the current drift and is +explicitly rejected. + +The chosen path starts small: **bind only the three crates with the highest Python +utility** (`wifi-densepose-core`, `wifi-densepose-vitals`, `wifi-densepose-signal`), +ship a `py3-none-any` pure-Python WS/MQTT client layer as a separate sub-module, and +grow from there. + +--- + +## 5. Detailed design + +### 5.1 Rust crates bound in v2.0 (first wheel) + +Three crates are in scope for the initial binding. They were chosen because they have +no heavy system dependencies (no libtorch, no ONNX runtime), have stable `pub` re-export +surfaces in `lib.rs`, and directly address the three most-requested missing capabilities. + +| Crate | Exported Python types / functions | Binding rationale | +|---|---|---| +| `wifi-densepose-core` | `CsiFrame`, `CsiMetadata`, `Keypoint`, `KeypointType`, `PersonPose`, `PoseEstimate`, `Confidence`, `BoundingBox` | Foundation types shared by all other crates; without these users can't even describe a frame | +| `wifi-densepose-vitals` | `CsiVitalPreprocessor`, `BreathingExtractor`, `HeartRateExtractor`, `VitalAnomalyDetector`, `VitalSignStore`, `VitalReading`, `VitalEstimate`, `AnomalyAlert` | The most-asked-for surface: HR/BR from a CSI buffer in 4 lines of Python | +| `wifi-densepose-signal` | `CsiProcessor`, `CsiProcessorConfig`, `PhaseSanitizer`, `MotionDetector`, `MotionScore`, `FeatureExtractor`, `HardwareNormalizer` | DSP pipeline that produces the features vitals and pose estimation consume | + +Crates **deferred to P6+**: `wifi-densepose-nn` (requires libtorch or candle — wheel +size risk), `wifi-densepose-mat` (depends on nn), `wifi-densepose-ruvector` (RuVector +GNN types — high value but adds ruvector-gnn 2.0.5 link dependency), +`wifi-densepose-hardware` (ESP32 HAL — not Python-scripting friendly). + +### 5.2 New workspace member: `python/` + +A new crate `python/` is added as a workspace member at `v2/crates/wifi-densepose-py/`. +It is a `cdylib` that re-exports the three bound crates behind a single maturin module +named `wifi_densepose._core`. + +```toml +# v2/crates/wifi-densepose-py/Cargo.toml (sketch) +[package] +name = "wifi-densepose-py" +version.workspace = true +edition.workspace = true + +[lib] +name = "_core" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = { version = "0.28", features = ["extension-module", "abi3-py310"] } +wifi-densepose-core = { path = "../wifi-densepose-core", features = ["serde"] } +wifi-densepose-vitals = { path = "../wifi-densepose-vitals" } +wifi-densepose-signal = { path = "../wifi-densepose-signal" } +``` + +The `abi3-py310` feature locks the stable ABI to CPython 3.10+, so one wheel binary +works across 3.10, 3.11, 3.12, and 3.13 without recompilation. + +PyO3 bindings pattern (example for `CsiFrame`): + +```rust +// v2/crates/wifi-densepose-py/src/core_types.rs +use pyo3::prelude::*; +use wifi_densepose_core::CsiFrame as RustCsiFrame; + +#[pyclass(name = "CsiFrame")] +#[derive(Clone)] +pub struct PyCsiFrame { + inner: RustCsiFrame, +} + +#[pymethods] +impl PyCsiFrame { + #[new] + fn new(amplitudes: Vec, phases: Vec, n_subcarriers: usize, + sample_index: u64, sample_rate_hz: f32) -> Self { + Self { inner: RustCsiFrame { amplitudes, phases, n_subcarriers, + sample_index, sample_rate_hz } } + } + + #[getter] fn amplitudes(&self) -> Vec { self.inner.amplitudes.clone() } + #[getter] fn phases(&self) -> Vec { self.inner.phases.clone() } + #[getter] fn n_subcarriers(&self) -> usize { self.inner.n_subcarriers } +} +``` + +DSP calls that execute >1 ms release the GIL: + +```rust +#[pymethods] +impl PyCsiProcessor { + fn process<'py>(&mut self, py: Python<'py>, frame: &PyCsiFrame) + -> PyResult> + { + py.allow_threads(|| self.inner.process(&frame.inner)) + .map(|opt| opt.map(PyProcessedSignal::from)) + .map_err(|e| PyRuntimeError::new_err(e.to_string())) + } +} +``` + +### 5.3 pip package layout + +``` +wifi-densepose/ ← PyPI package name (unchanged) + wifi_densepose/ ← importable namespace + __init__.py ← re-exports core types + version + _core.pyd / _core.so ← compiled PyO3 extension (maturin build output) + vitals.py ← thin Python wrapper + docstrings over _core vitals types + signal.py ← thin Python wrapper over _core signal types + client/ + __init__.py + ws.py ← asyncio WebSocket client for sensing-server /ws/sensing + mqtt.py ← paho-mqtt wrapper for ruview//raw/* topics + ha.py ← helpers for HA-DISCO payloads (read-only, mirrors ADR-115 §3.2) + witness/ + __init__.py + verify.py ← Python-callable witness verifier (re-creates ADR-028 proof + over the Rust pipeline via PyO3 bindings, not archive/v1/) + compat/ + v1.py ← import shim that raises MigrationError (see §9) + py.typed ← PEP 561 marker +``` + +The import path intentionally maps to Rust crate names: + +```python +from wifi_densepose import CsiFrame # core types +from wifi_densepose.vitals import BreathingExtractor, HeartRateExtractor +from wifi_densepose.signal import CsiProcessor, MotionDetector +from wifi_densepose.client.ws import SensingClient +from wifi_densepose.witness import verify_bundle +``` + +### 5.4 PyPI distribution — wheel matrix + +Published as `wifi-densepose==2.0.0` using **cibuildwheel** driven by GitHub Actions. + +| Platform | Arch | CPython | Tag (stable ABI) | +|---|---|---|---| +| `manylinux_2_28` | x86_64 | 3.10+ | `cp310-abi3-manylinux_2_28_x86_64` | +| `manylinux_2_28` | aarch64 | 3.10+ | `cp310-abi3-manylinux_2_28_aarch64` | +| `macosx_11_0` | x86_64 | 3.10+ | `cp310-abi3-macosx_11_0_x86_64` | +| `macosx_11_0` | arm64 | 3.10+ | `cp310-abi3-macosx_11_0_arm64` | +| `win` | amd64 | 3.10+ | `cp310-abi3-win_amd64` | +| sdist | — | — | source fallback | + +The `abi3-py310` flag means **one binary per OS/arch** covers all supported Python +versions — 5 wheels total plus an sdist, compared to the 20-wheel matrix that would be +needed without stable ABI. + +```yaml +# .github/workflows/pip-release.yml (sketch) +- uses: pypa/cibuildwheel@v2 + with: + package-dir: v2/crates/wifi-densepose-py + output-dir: dist + env: + CIBW_BUILD: "cp310-*" + CIBW_ARCHS_LINUX: "x86_64 aarch64" + CIBW_ARCHS_MACOS: "x86_64 arm64" + CIBW_ARCHS_WINDOWS: "AMD64" + CIBW_BEFORE_BUILD: "pip install maturin" + CIBW_BUILD_FRONTEND: "build[uv]" +``` + +### 5.5 CLI parity + +The pip wheel installs a `wifi-densepose` console script. In v2 this script is a thin +Python shim that: + +1. Checks whether `wifi-densepose-sensing-server` binary is on `PATH` (installed + separately via a platform-specific binary distribution or `cargo install`). +2. If found: proxies `wifi-densepose serve`, `wifi-densepose stream`, etc. to the Rust + binary via `subprocess.run`. +3. If not found: falls back to the PyO3 module for offline DSP commands + (`wifi-densepose vitals --file recording.jsonl`). + +This is explicitly **not** a reimplementation of the CLI — the Rust binary +(`wifi-densepose-cli/src/main.rs`, currently exposes `mat` and `version` subcommands) +is the authoritative CLI. The pip shim is a discovery/convenience layer. + +### 5.6 WS/MQTT client layer + +`wifi_densepose.client.ws.SensingClient` is a pure-Python asyncio client wrapping the +sensing-server WebSocket at `/ws/sensing`: + +```python +async with SensingClient("ws://localhost:8765/ws/sensing") as client: + async for msg in client.stream(): + if msg.type == "edge_vitals": + print(msg.breathing_rate_bpm, msg.heartrate_bpm) +``` + +`wifi_densepose.client.mqtt.RuViewMqttClient` wraps paho-mqtt and subscribes to +`ruview//raw/+` as defined in ADR-115 §3.2. + +Both clients are **pure Python** (no PyO3) and are optional dependencies (`pip install +wifi-densepose[client]`). They depend on `websockets>=12` and `paho-mqtt>=2` respectively. + +### 5.7 Witness chain (re-rooted to the Rust pipeline) + +`wifi_densepose.witness.verify_bundle(path)` replaces the v1 proof verification with a +new chain that exercises the Rust pipeline via PyO3: + +```python +from wifi_densepose.witness import verify_bundle + +result = verify_bundle("dist/witness-bundle-ADR028-*/") +assert result.verdict == "PASS", result.detail +``` + +Internally it: +1. Loads the 1,000-frame reference JSON from the bundle. +2. Feeds each frame through `PyCsiProcessor` (PyO3 binding of the Rust `CsiProcessor`). +3. Hashes the output using the same SHA-256 scheme as `archive/v1/data/proof/verify.py`. +4. Compares against the published hash in `expected_features.sha256`. + +The v1 proof (`archive/v1/data/proof/verify.py`) is **preserved unchanged** — it +continues to prove the v1 pipeline. The new `witness.py` proves the v2/Rust pipeline. +Both can coexist; the ADR-028 witness bundle ships with both. + +--- + +## 6. Migration path (phased) + +``` +P1 ──► P2 ──► P3 ──► P4 ──► P5 ──► P6+ +scaffold core vitals+ client publish deferred + types signal layer v2.0.0 +``` + +### P1 — Scaffold (1 week) + +- [ ] Add `v2/crates/wifi-densepose-py/` as workspace member. +- [ ] `Cargo.toml`: `crate-type = ["cdylib"]`, pyo3 0.28 + `abi3-py310`, no + workspace deps yet (empty module compiles and imports). +- [ ] `pyproject.toml` at repo root `python/` with `[build-system] requires = + ["maturin>=1.8"]` and `[tool.maturin] features = ["pyo3/extension-module"]`. +- [ ] CI job: `maturin develop` on ubuntu-latest in a Python 3.12 venv; import + `wifi_densepose._core` succeeds. +- [ ] Publish `wifi-densepose==1.99.0` to PyPI with a migration notice in the + module body (see §9 — no new features, just the tombstone release). + +### P2 — Core type bindings (1 week) + +- [ ] Bind `CsiFrame`, `CsiMetadata`, `Confidence`, `Keypoint`, `KeypointType`, + `BoundingBox`, `PoseEstimate`, `PersonPose` from `wifi-densepose-core`. +- [ ] All types: `__repr__`, `__eq__`, `__hash__` where meaningful; serde JSON + round-trip via `pyo3-serde` or manual `to_dict()` / `from_dict()`. +- [ ] Add `py.typed` + stub `.pyi` file generated by `pyo3-stub-gen`. +- [ ] Unit tests: `tests/test_core.py` — construct each type, round-trip JSON. + +### P3 — Vitals + signal DSP bindings (2 weeks) + +- [ ] Bind the full 4-stage vitals pipeline: + `CsiVitalPreprocessor`, `BreathingExtractor`, `HeartRateExtractor`, + `VitalAnomalyDetector`, `VitalSignStore`, `VitalReading`, `VitalEstimate`, + `AnomalyAlert`. +- [ ] Bind signal DSP entry points: `CsiProcessor`, `CsiProcessorConfig`, + `PhaseSanitizer`, `MotionDetector`, `HardwareNormalizer`. +- [ ] GIL release (`py.allow_threads`) on all calls >0.5 ms (measured in bench). +- [ ] Integration test: feed 1,000 frames from `archive/v1/data/proof/sample_csi_data.json` + through the PyO3 vitals pipeline; assert output is deterministic across runs. +- [ ] Re-implement `witness/verify.py` using P3 bindings; compare SHA-256 against the + v1 expected hash. **Note:** the hash will differ because the Rust and Python + processors are not identical — generate and publish a new `expected_features_v2.sha256`. + +### P4 — WS/MQTT client layer (1 week) + +- [ ] Implement `wifi_densepose.client.ws.SensingClient` (asyncio, `websockets>=12`). +- [ ] Implement `wifi_densepose.client.mqtt.RuViewMqttClient` (paho-mqtt 2.x). +- [ ] Add `wifi_densepose.client.ha` helpers that parse ADR-115 MQTT discovery payloads + into Python dataclasses. +- [ ] Integration test: spin up `sensing-server` in Docker with `--mock-frames`; + assert `SensingClient` receives `edge_vitals` messages. + +### P5 — First cibuildwheel publish as v2.0.0 (1 week) + +- [ ] `.github/workflows/pip-release.yml` — cibuildwheel matrix (5 wheels + sdist). +- [ ] `python_requires = ">=3.10"` (stable ABI base). +- [ ] Populate `pyproject.toml` with minimal `install_requires`: `pyo3` is a build dep, + not a runtime dep. Runtime extras: `[client]` adds `websockets>=12,paho-mqtt>=2`. +- [ ] `pip install wifi-densepose==2.0.0` and smoke-test on each CI platform. +- [ ] PyPI publish via Trusted Publisher (OIDC, no API token in secrets). +- [ ] Announce: `wifi-densepose==1.99.0` tombstone already on PyPI; `v2.0.0` replaces + it in search results. + +### P6+ — Deferred + +- [ ] `wifi-densepose-nn` bindings (libtorch / candle wheel size TBD — see Open + Questions §13.3). +- [ ] `wifi-densepose-ruvector` bindings (RuVector attention types). +- [ ] MQTT/Matter integration helpers (`wifi_densepose.client.matter`). +- [ ] Deprecation notice on `wifi-densepose==1.x` releases (PyPI yank — see §9). +- [ ] `wifi-densepose-sensing-server` binary distribution via pip extra + (`pip install wifi-densepose[server]` fetches pre-built binary for the platform). +- [ ] HACS Python integration built on top of the pip client layer (follow-on to + ADR-115 §6.A). + +--- + +## 7. Compatibility and deprecation + +### 7.1 Version bump strategy + +`wifi-densepose==2.0.0` is a **hard major-version break**. The 1.x import namespace +`src.*` is incompatible with the 2.x namespace `wifi_densepose.*`. There is no shim +that can bridge them transparently. + +### 7.2 Tombstone release: v1.99.0 + +Before publishing v2.0.0, publish `wifi-densepose==1.99.0` as a pure-Python sdist/wheel +whose sole content is: + +```python +# wifi_densepose/__init__.py (v1.99.0) +raise ImportError( + "wifi-densepose 1.x has been superseded by v2.0.0 which wraps " + "the Rust-based stack. Run:\n\n" + " pip install wifi-densepose==2.0.0\n\n" + "Migration guide: https://github.com/ruvnet/RuView/blob/main/docs/pip-migration.md\n" + "Legacy v1 source: archive/v1/ in the repository" +) +``` + +This ensures any project pinned to `wifi-densepose>=1` that upgrades to 1.99.0 gets a +clear error rather than a silent broken import. + +### 7.3 PyPI yank strategy + +After v2.0.0 is stable (90-day observation window): + +- Yank `wifi-densepose==1.0.0` — never had a separate stable release period; was + superseded 4 hours after publication. +- Leave `wifi-densepose==1.1.0` un-yanked but deprecated in the description. +- Publish `wifi-densepose==1.99.0` as the canonical 1.x landing page (raise error). + +Yanked versions remain installable with `pip install wifi-densepose==1.1.0 --force` +so users with reproducible builds pinned to exact versions are not broken silently. + +### 7.4 Semver + +| Version | Content | +|---|---| +| 1.0.0 – 1.1.0 | Legacy Python server (archive/v1/) | +| **1.99.0** | Tombstone: ImportError migration notice | +| **2.0.0** | PyO3 Rust bindings + WS/MQTT client | +| 2.x.y | Additive bindings + client improvements | +| 3.0.0 | If/when nn bindings added (libtorch wheel size may force a separate package) | + +--- + +## 8. Alternatives considered and rejected + +### Alt-A: Subprocess wrapper + +Package the pre-built `wifi-densepose-sensing-server` Rust binary inside the pip wheel. +Python calls it via `subprocess`. **Rejected** because: the binary is 15–30 MB stripped; +the install footprint is prohibitive; offline DSP scripting still requires the server to +be running; the witness chain cannot exercise Rust code through a black-box binary. + +### Alt-B: REST/WS client only + +Ship a pure-Python package that is purely a client to a running `sensing-server` +instance. **Rejected** because: it provides zero offline utility; it cannot host the +witness chain over the Rust pipeline; it solves the "Python access to telemetry" problem +but not the "Python DSP / prototyping" problem that academic and embedded users need. + +### Alt-C: Pure Python reimplementation + +Rewrite the DSP pipeline in pure Python/NumPy to reach parity with the Rust +implementation. **Rejected explicitly** — this is the root cause of the current 11-month +drift and the pattern this ADR is designed to exit. Any Python reimplementation will +immediately begin drifting again as the Rust stack evolves. + +--- + +## 9. Risks + +| Risk | Likelihood | Severity | Mitigation | +|---|---|---|---| +| **Build matrix complexity** — 5 target triples × cibuildwheel setup; CI time; QEMU for aarch64 cross-compile | High | Medium | Use `abi3-py310` (5 wheels not 20); QEMU aarch64 emulation available in GitHub Actions; maturin handles auditwheel automatically | +| **Binary size** — future nn/ONNX bindings may push wheel past 50 MB | Medium | High | Keep nn bindings in a separate `wifi-densepose-nn` PyPI package; keep core+vitals+signal wheel lean (~2 MB stripped) | +| **GIL / async issues** — PyO3 wrapping tokio crates requires careful runtime management; `py.allow_threads` must be used around all blocking Rust calls | High | High | Restrict initial bindings to synchronous Rust APIs (vitals, signal, core are all sync); async sensing-server client stays in pure-Python `client/ws.py` | +| **Maintainer overhead** — two languages, two build systems, one PyPI package | Medium | Medium | maturin unifies the build; CI handles publishing; start with 3 bound crates only | +| **1.x user breakage** — users pinned to `wifi-densepose>=1,<2` will get the tombstone | Low | Medium | 1.99.0 tombstone gives a clear error; maintain 1.1.0 on PyPI un-yanked for 90 days post-v2 | +| **Windows Rust toolchain in CI** — linking PyO3 on Windows requires MSVC or mingw; extra CI complexity | Medium | Medium | GitHub Actions `windows-latest` has MSVC; maturin + cibuildwheel handle this natively | +| **Stable ABI limitations** — `abi3` precludes some advanced PyO3 features (e.g. `Buffer` protocol) | Low | Low | Core/vitals/signal types are scalar/Vec — no need for buffer protocol in P2–P3 | +| **PyPI name ownership** — we own `wifi-densepose` on PyPI (confirmed via rUv author field) | Low | Low | Confirm with `pypi.org/user/ruvnet` before publishing | + +--- + +## 10. Acceptance criteria + +The following checks must all pass before ADR-117 is considered Accepted: + +- [ ] `pip install wifi-densepose==2.0.0` succeeds on Python 3.10, 3.11, 3.12, 3.13 + on linux/x86_64, macos/arm64, and windows/amd64 in a clean venv with no extra build tools. +- [ ] `python -c "import wifi_densepose; print(wifi_densepose.__version__)"` prints `2.0.0`. +- [ ] `python -c "from wifi_densepose import CsiFrame; f = CsiFrame([1.0]*56, [0.0]*56, 56, 0, 100.0); print(f)"` produces a non-error repr. +- [ ] The 4-stage vitals pipeline processes 1,000 frames in under 500 ms on a + reference machine (CPython 3.12, linux x86_64, no GPU). +- [ ] `wifi_densepose.witness.verify_bundle(path)` returns `verdict="PASS"` for a + freshly generated witness bundle from `scripts/generate-witness-bundle.sh`. +- [ ] `wifi_densepose.client.ws.SensingClient` receives at least one `edge_vitals` + message from a `sensing-server --mock-frames` instance within 5 seconds. +- [ ] `pip install wifi-densepose==1.99.0` raises `ImportError` with the migration URL. +- [ ] The compiled `_core` extension has no unresolved dynamic library dependencies + beyond libc/msvcrt (verified by `auditwheel show` on Linux, `delocate-listdeps` on macOS). +- [ ] Type stubs (`wifi_densepose/*.pyi`) are present; `mypy --strict` passes on the + example code in `examples/vitals_from_buffer.py`. +- [ ] Total wheel size for core+vitals+signal: `≤ 5 MB` per platform. + +--- + +## 11. Open questions + +1. **Stable ABI base version**: `abi3-py310` drops support for Python 3.9, which v1.1.0 + declared. Is Python 3.9 EOL-enough (EOL 2025-10-05) to drop cleanly? *Tentative: yes, + drop 3.9. Use abi3-py310.* + +2. **Package name for nn bindings**: if `wifi-densepose-nn` bindings require a 30 MB + libtorch wheel, should they live at `wifi-densepose-nn` (separate PyPI package) or + as an optional heavy extra of `wifi-densepose[nn]`? *Tentative: separate package to + avoid polluting the lean wheel.* + +3. **Witness hash continuity**: the Rust pipeline will produce a different SHA-256 than + the v1 Python pipeline for the same input frames. The new `expected_features_v2.sha256` + must be generated and committed before v2.0.0 ships. Who generates it, and how is + the generation process itself witnessed? *Tentative: generate in CI, commit hash to + `archive/v1/data/proof/`, include in ADR-028 matrix.* + +4. **`ruv-neural` crate**: `v2/crates/ruv-neural/` exists in the workspace. Is it a + candidate for early Python bindings (useful for training-loop scripting), or should + it wait for the nn/train tier? *Tentative: defer — it depends on training backends.* + +5. **Tokio runtime**: `wifi-densepose-sensing-server` is tokio-based, but the three + crates bound in P2–P3 (`core`, `vitals`, `signal`) are synchronous. Are there any + hidden tokio dependencies that would force a runtime into the extension module? + *Tentative: inspect each crate's Cargo.toml for tokio deps before P1 scaffold.* + +6. **`pyo3-stub-gen` vs manual stubs**: automated stub generation from PyO3 has rough + edges for generics and newtype patterns. Should we hand-write `.pyi` stubs for the + first release? *Tentative: use `pyo3-stub-gen` for scaffolding, hand-tune for public + API.* + +7. **`wifi_densepose` vs `wifi-densepose` namespace**: the pip package name uses a dash + (`wifi-densepose`) but Python imports use underscores (`wifi_densepose`). The v1 + package shipped under `src.*`, not `wifi_densepose.*`. Is there any tooling that + hardcodes the `src` namespace? *Tentative: the `src.*` namespace was specific to + `archive/v1/` and is cleanly dropped.* + +8. **cibuildwheel version**: the current stable is cibuildwheel v2.x. Does the + project's existing GitHub Actions config need updates for maturin builds vs + the current `cargo build` / `build.py` patterns? *Tentative: yes, add a separate + `pip-release.yml` workflow; do not modify existing Rust CI.* + +9. **RuVector bindings timeline**: the `wifi-densepose-ruvector` crate (`v2/crates/`) + depends on `ruvector-gnn = "2.0.5"`. Does ruvector-gnn ship as a pre-built static + lib or require linking at build time? This directly affects the P6+ wheel size. + *Tentative: investigate ruvector-gnn link strategy before committing to a timeline.* + +10. **`wifi_densepose.client.ha` conflict with ADR-115/116**: the `ha.py` helper module + should not duplicate the ADR-115 MQTT discovery logic in Python. Should it be read-only + (parse HA discovery JSON → Python dataclasses) or also write (publish discovery JSON)? + *Tentative: read-only for v2.0. Write path deferred to the HACS integration follow-on + (ADR-115 §6.A).* + +--- + +## 12. References + +- **PyPI package (current)**: https://pypi.org/project/wifi-densepose/ — v1.1.0, released 2025-06-07 +- **PyPI JSON metadata**: https://pypi.org/pypi/wifi-densepose/json +- **Local source**: `archive/v1/setup.py`, `archive/v1/src/__init__.py`, `archive/v1/data/proof/verify.py` +- **Rust workspace**: `v2/Cargo.toml`, `v2/crates/wifi-densepose-core/src/lib.rs`, + `v2/crates/wifi-densepose-vitals/src/lib.rs`, `v2/crates/wifi-densepose-signal/src/lib.rs`, + `v2/crates/wifi-densepose-sensing-server/src/lib.rs` +- **PyO3 docs**: https://pyo3.rs/ — v0.28.3 stable, Rust ≥1.83 required +- **maturin docs**: https://maturin.rs/ — supports Python 3.8+ on Linux/macOS/Windows/FreeBSD +- **cibuildwheel docs**: https://cibuildwheel.pypa.io/ +- **ADR-021**: ESP32 vitals — defines the HR/BR extraction pipeline this ADR exposes in Python +- **ADR-028**: ESP32 capability audit — defines the witness bundle format `witness/verify.py` must re-verify +- **ADR-115**: HA-DISCO + HA-MIND + HA-FABRIC — defines the MQTT topic structure the `client/mqtt.py` helper consumes +- **ADR-116**: HA-COG cog packaging — parallel effort; ADR-117 pip library is the developer-facing Python surface; ADR-116 is the Seed-installable artifact diff --git a/docs/research/soul/README.md b/docs/research/soul/README.md new file mode 100644 index 00000000..a9b99293 --- /dev/null +++ b/docs/research/soul/README.md @@ -0,0 +1,116 @@ +# Soul Signature — Research Specification + +**Status:** Research Specification (Pre-Implementation) +**Date:** 2026-05-24 +**Maintainer:** ruv + +--- + +## What Is a Soul Signature + +A Soul Signature is a fused multi-modal biometric identity vector derived entirely +from passive electromagnetic measurement of a person inside a room equipped with +WiFi-DensePose / RuView sensing nodes. No wearable, no camera, no explicit +scan-time consent moment is required for recognition once a person has enrolled. + +The word "soul" is deliberate product framing for a scientifically defensible concept: +the same relationship a fingerprint bears to identity in forensic science, or FaceID +to phone authentication, but extended to a new sensing dimension — passive RF at +distance, through walls, at room scale. Seven orthogonal electromagnetic observables, +fused into a single content-addressed RVF graph file, constitute the signature. + +The claim is not mystical. Every channel is grounded in published physics and prior +WiFi sensing literature. Every assertion about discriminative power either cites a +peer-reviewed result or is explicitly marked "open research; baseline TBD." + +--- + +## What a Soul Signature Is NOT + +- It is NOT a replacement for fingerprint scanners, iris scanners, or FaceID on + accuracy-per-attempt measures. Current RF biometrics are less mature than those + modalities. See `security.md` for the honest error-rate picture. +- It is NOT a single number, hash, or deterministic bit string. It is a + probabilistic match against a stored graph with a calibrated false-accept rate. +- It is NOT medically diagnostic. It detects biophysical proxies, not conditions. + "Gait asymmetry increased 18% over 14 days" is the output, never "Parkinson's." +- It is NOT equivalent to explicit-consent biometrics in regulated contexts. GDPR + and HIPAA modes are defined and mandatory for healthcare deployments. +- It is NOT currently deployable as a legal evidence instrument. +- It is NOT snake oil, energy healing, or anything outside measurable electrophysics. + +--- + +## Document Map + +| File | Contents | +|------|----------| +| `specification.md` | Typed RVF graph schema; all node types, edge types, serialization format; aggregator vs stored profile distinction | +| `scanning-process.md` | Structured 60-second enrollment protocol; hardware requirements; quality gates; fast-scan and continuous modes; re-scan cadence | +| `security.md` | Full threat model; five adversaries; mitigations; cryptographic primitive choices; GDPR/HIPAA mode; open research items | +| `references.md` | All cited ADRs, papers, datasets, standards | + +--- + +## Conceptual Graph (ASCII) + +The following depicts one example soul signature as a graph stored in a single +RVF container. Each box is an RVF node (a SEG_EMBED or SEG_META segment). Each +arrow is a typed edge stored in the graph manifest. + +``` + +-----------------------+ + | AETHER_Embedding | 128-dim f32, L2-normalized (ADR-024) + | contrastive CSI | HNSW-searchable via ruvector-core + | backbone embedding | + +----------+------------+ + | derived_from + v + +-----------+-----------+ +------------------------+ + | FieldModel_Residual +---fuses--+ Subcarrier_Reflection | + | ADR-030 perturbation | | per-angle multipath | + | eigenmode projection | | amplitude + phase | + +----------+------------+ +------------------------+ + | correlates_with + v + +----------+------------+ +------------------------+ + | Cardiac_HR_Profile +--links---+ Cardiac_Waveform_ | + | baseline_bpm, HRV_LF | | Morphology (wavelet | + | HRV_HF, rhythm_class | | coefficients) | + +----------+------------+ +------------------------+ + | temporally_colocated + v + +----------+------------+ + | Respiratory_Pattern | + | baseline_bpm, depth, | + | apnea_index, HRV_RSA | + +----------+------------+ + | temporally_colocated + v + +----------+------------+ +------------------------+ + | Gait_Timing +--links---+ Skeletal_Proportions | + | cadence, stride_var, | | torso/limb ratios | + | double_support_pct, | | from ADR-079 keypoints | + | asymmetry_index | +------------------------+ + +----------+------------+ + | attested_by + v + +----------+------------+ + | WitnessChain | Ed25519 over (content_hash || + | ADR-110 attestation | timestamp || device_id) per ADR-110 + +-----------------------+ +``` + +File naming convention: `signature-.rvf` + +--- + +## Implementation Status + +This is a **research specification**. None of the soul-signature-specific graph +container logic is implemented yet. The constituent ADRs (AETHER, MERIDIAN, +RuvSense field model, ADR-039 vitals, ADR-110 witness chain) provide the substrate. +The soul signature is the composition layer above them. + +A future implementation ADR should reference this document and assign acceptance +tests derived from the quality gates defined in `scanning-process.md`. diff --git a/docs/research/soul/references.md b/docs/research/soul/references.md new file mode 100644 index 00000000..c2bb947c --- /dev/null +++ b/docs/research/soul/references.md @@ -0,0 +1,138 @@ +# Soul Signature — References + +**Status:** Research Specification (Pre-Implementation) +**Date:** 2026-05-24 +**Author:** ruv + +--- + +## 1. Internal Architecture Decision Records + +All ADRs are located at `docs/adr/ADR-XXX-*.md` in this repository. + +| ADR | Title | Relevance to soul signature | +|---|---|---| +| ADR-003 | RVF Cognitive Containers for CSI Data | RVF container format used by soul signature | +| ADR-004 | HNSW Vector Search for Signal Fingerprinting | HNSW index for person_track embedding search | +| ADR-005 | SONA Self-Learning Pose Estimation | LoRA adaptation, EWC regularization, environment profiles | +| ADR-007 | Post-Quantum Cryptography Secure Sensing | PQC cryptographic context; foundation for ADR-108/109 | +| ADR-010 | Witness Chains Audit Trail Integrity | Witness chain design; Ed25519 over frame bundles | +| ADR-014 | SOTA Signal Processing Algorithms | RuvSense pipeline: conjugate multiplication, Hampel filter, spectrogram, BVP | +| ADR-021 | Vital Sign Detection via rvdna Pipeline | Cardiac HR / respiratory extraction; bandpass filters; ADR-039 vitals packet | +| ADR-023 | Trained DensePose Model with RuVector Pipeline | CsiToPoseTransformer backbone; MPJPE baseline 91.7 mm | +| ADR-024 | Project AETHER — Contrastive CSI Embedding Model | Primary soul signature identity channel; 128-dim L2-normalized embedding; HNSW person_track index (>80% mAP target at 5 subjects) | +| ADR-027 | Project MERIDIAN — Cross-Environment Domain Generalization | Environment-disentangled embeddings; HardwareNormalizer; multi-room portability | +| ADR-029 | RuvSense Multistatic Sensing Mode | Multi-node mesh; 20 Hz DensePose; <30 mm jitter; person separation | +| ADR-030 | RuvSense Persistent Field Model | Field normal modes; SVD eigenstructure; perturbation extraction; longitudinal drift; adversarial detection; cross-room continuity | +| ADR-039 | ESP32-S3 Edge Intelligence Pipeline | Vitals packet wire format (magic `0xC511_0002`); HR/BR on-device extraction | +| ADR-075 | MinCut Person Separation | ruvector-mincut for multi-person track assignment | +| ADR-079 | Camera Ground-Truth Training | Paired camera + CSI training; skeletal proportions accuracy | +| ADR-082 | Pose Tracker Confirmed Output Filter | Pose tracker output confidence filtering | +| ADR-100 | Cog Packaging Specification | Ed25519 firmware signing; supply chain integrity | +| ADR-105 | Federated CSI Training | Federated AETHER fine-tuning; secure aggregation | +| ADR-106 | DP-SGD and Primitive Isolation | Differential privacy at training; biometric primitive isolation; (ε, δ)-DP budget | +| ADR-107 | Cross-Installation Federation | Cross-installation secure aggregation; DH key exchange | +| ADR-108 | Kyber Post-Quantum Key Exchange | Kyber-768 (NIST FIPS 203); hybrid X25519 + Kyber during migration | +| ADR-109 | Dilithium PQC Signatures | Dilithium-3 (NIST FIPS 204); hybrid Ed25519 + Dilithium; cog signing | +| ADR-110 | ESP32-C6 Firmware Extension | Wi-Fi 6 HE-LTF CSI (242 subcarriers); 802.15.4 time-sync; TWT; Ed25519 witness chain per-frame | +| ADR-113 | Multistatic Placement Strategy | Node placement geometry; coverage analysis | +| ADR-115 | Home Assistant Integration (HA-DISCO + HA-MIND) | Privacy mode; MQTT auto-discovery; semantic primitives layer under which soul signature operates | + +--- + +## 2. AETHER and Contrastive Embedding Foundations + +- Chen, T., Kornblith, S., Norouzi, M., & Hinton, G. (2020). **A Simple Framework for Contrastive Learning of Visual Representations** (SimCLR). *ICML 2020*. arXiv:2002.05709. +- Chen, T., Kornblith, S., Sohl-Dickstein, J., & Hinton, G. (2020). **Big Self-Supervised Models are Strong Semi-Supervised Learners** (SimCLR v2). *NeurIPS 2020*. arXiv:2006.10029. +- Bardes, A., Ponce, J., & LeCun, Y. (2022). **VICReg: Variance-Invariance-Covariance Regularization for Self-Supervised Learning**. *ICLR 2022*. arXiv:2105.04906. +- Grill, J.-B., et al. (2020). **Bootstrap Your Own Latent: A New Approach to Self-Supervised Learning** (BYOL). *NeurIPS 2020*. arXiv:2006.07733. +- Wang, T. & Isola, P. (2020). **Understanding Contrastive Representation Learning through Alignment and Uniformity on the Hypersphere**. *ICML 2020*. arXiv:2005.10242. + +--- + +## 3. WiFi CSI Biometric Identification (Prior Art) + +- **IdentiFi** (2025): Self-supervised WiFi-based identity recognition in multi-user smart environments. Contrastive pretraining in the signal domain produces identity-discriminative embeddings without spatial labels. *PMC:12115556*. +- **WhoFi** (2025): Transformer-based WiFi CSI encoding for person re-identification. 95.5% accuracy on NTU-Fi (18 subjects). Validates transformer backbones for CSI re-ID. arXiv:2507.12869. +- **Wi-PER81** (2025): Benchmark dataset of 162K wireless packets for WiFi-based person re-identification using Siamese networks. *Nature Scientific Data*, 2025. doi:10.1038/s41597-025-05804-0. +- **CAPC** (Context-Aware Predictive Coding, 2024): CPC + Barlow Twins for WiFi sensing. 24.7% accuracy improvement on unseen environments. arXiv:2410.01825. +- **SSL for WiFi HAR Survey** (2025): Comprehensive evaluation of SimCLR, VICReg, Barlow Twins, SimSiam on WiFi CSI. arXiv:2506.12052. + +--- + +## 4. WiFi Sensing SOTA (Pose, Vitals, Gait) + +- Geng, J., Huang, D., & De la Torre, F. (2022). **DensePose From WiFi**. *CMU*. arXiv:2301.00250. +- Adib, F., Kabelac, Z., Katabi, D., & Miller, R.C. (2015). **3D Tracking via Body Radio Reflections** (WiTrack). *NSDI 2015*. +- Wang, J., Gao, X., Zhang, K., & Liu, X. (2019). **Widar 3.0: Zero-Effort Cross-Domain Gesture Recognition with Wi-Fi**. *MobiSys 2019*. +- Zhao, M., Li, T., Abu Alsheikh, M., Tian, Y., Zhao, H., Torralba, A., & Katabi, D. (2018). **Through-Wall Human Pose Estimation Using Radio Signals**. *CVPR 2018*. +- Zhao, M., Adib, F., & Katabi, D. (2016). **Emotion Recognition Using Wireless Signals** (EQ-Radio). *MobiCom 2016*. (HRV from WiFi; cardiac biometric baseline) +- **PerceptAlign** (Chen et al., 2026): Geometry-conditioned cross-layout WiFi pose estimation. >60% cross-domain error reduction. Dataset: 21 subjects, 5 scenes, 18 actions. arXiv:2601.12252. +- **Person-in-WiFi 3D** (Yan et al., 2024): Multi-person 3D pose from WiFi. 91.7 mm MPJPE (single-person). *CVPR 2024*. +- **DGSense** (Zhou et al., 2025): Domain-invariant features for WiFi/mmWave/acoustic sensing. arXiv:2502.08155. +- **X-Fi** (Chen & Yang, 2025): Modality-invariant foundation model for human sensing. 24.8% MPJPE improvement on MM-Fi. *ICLR 2025*. arXiv:2410.10167. +- **AM-FM** (2026): First WiFi foundation model, pretrained on 9.2M CSI samples, 20 device types, 439 days. arXiv:2602.11200. +- Ma, Y., Zhou, G., Wang, S., Zhao, H., & Jung, W. (2018). **SignFi: Sign Language Recognition Using WiFi**. *ACM IMWUT*. arXiv:1806.04583. + +--- + +## 5. Training Datasets Referenced + +- **MM-Fi** (2022): Multi-Modal Non-Intrusive 4D Human Dataset — WiFi CSI, mmWave, LiDAR, RGB-D. 27 subjects, 40 actions, 5 environments, 320K samples. 56-subcarrier CSI, 17 COCO keypoints. [github.com/ybhbingo/MMFi_dataset] +- **Wi-Pose** (2022): WiFi-based 3D pose estimation dataset. Used in ADR-015. +- **NTU-Fi** (2022): 56 activities, WiFi CSI, 75 Hz sampling. Used for WhoFi evaluation. + +--- + +## 6. Differential Privacy + +- Abadi, M., Chu, A., Goodfellow, I., McMahan, H.B., Mironov, I., Talwar, K., & Zhang, L. (2016). **Deep Learning with Differential Privacy**. *CCS 2016*. [Moments Accountant; DP-SGD formulation used in ADR-106] +- Mironov, I. (2017). **Rényi Differential Privacy**. *CSF 2017*. [Alternative DP accounting; referenced in ADR-106 as future enhancement] +- Shokri, R., Stronati, M., Song, C., & Shmatikov, V. (2017). **Membership Inference Attacks Against Machine Learning Models**. *IEEE S&P 2017*. [Motivation for DP-SGD in ADR-106] + +--- + +## 7. Cryptographic Standards + +- **RFC 8032** (2017): Edwards-Curve Digital Signature Algorithm (EdDSA). [Ed25519; used in ADR-110 witness chain] +- **RFC 8439** (2018): ChaCha20 and Poly1305 for IETF Protocols. [At-rest encryption primitive specified in security.md §5] +- **RFC 9106** (2021): Argon2 Memory-Hard Function. [KDF for soul signature at-rest key derivation] +- **NIST FIPS 203** (2024): Module-Lattice-Based Key-Encapsulation Mechanism Standard (ML-KEM / Kyber). [ADR-108; post-quantum key exchange] +- **NIST FIPS 204** (2024): Module-Lattice-Based Digital Signature Standard (ML-DSA / Dilithium). [ADR-109; post-quantum signatures] +- **NIST SP 800-132 Draft** (2024): Recommendation for Password-Based Key Derivation. [Argon2id parameter guidance] + +--- + +## 8. Biometric Standards (for Standards Awareness) + +The soul signature is not currently certified to any of these standards but the +specification is designed with awareness of the relevant frameworks. + +- **ISO/IEC 19794-1:2011**: Biometric data interchange formats — Part 1: Framework. + [Top-level; soul signature's node/edge schema follows the typed-attribute-record + philosophy of this standard] +- **ISO/IEC 19794-2:2011**: Biometric data interchange formats — Part 2: Finger + minutiae data. [Structural analog for how the soul signature encodes per-channel + discriminative features] +- **ISO/IEC 19794-4:2011**: Biometric data interchange formats — Part 4: Finger image data. + [Image-container analog; soul signature extends the concept to vector-valued + multi-channel templates] +- **ISO/IEC 29794-1:2016**: Biometric sample quality — Part 1: Framework. + [Quality scoring framework; soul signature's per-node `confidence` field + is conceptually analogous to ISO 29794 quality scores] +- **ISO/IEC 30107-3:2023**: Biometric presentation attack detection — Part 3: + Testing and reporting. [Presentation attack (anti-spoofing) framework; + the adversarial.rs module is the soul signature's PAD implementation] + +--- + +## 9. Reading List for RF Biometrics Newcomers + +Ordered from most accessible to most technical. + +1. Adib, F. (2017). **Using Radio Reflections to See the World**. MIT PhD thesis. [Most accessible introduction to using RF for human sensing; covers WiVi, WiTrack, EQ-Radio] +2. Ma, Y., et al. (2019). **WiFi Sensing with Channel State Information: A Survey**. *ACM Computing Surveys*. doi:10.1145/3310194. [Comprehensive survey of CSI-based sensing approaches through 2019] +3. Wang, X., et al. (2023). **A Survey on WiFi Sensing: From Signal to Action**. *IEEE Internet of Things Journal*. [Updated survey through 2023; covers contrastive learning approaches] +4. Chen, T., et al. (2020). **A Simple Framework for Contrastive Learning** (SimCLR). arXiv:2002.05709. [Best starting point for understanding the contrastive learning approach used in AETHER] +5. Geng, J., et al. (2022). **DensePose From WiFi**. arXiv:2301.00250. [Direct ancestor of this codebase; describes the cross-modal CSI → DensePose mapping] +6. Abadi, M., et al. (2016). **Deep Learning with Differential Privacy**. CCS 2016. [Essential reading before any deployment collecting biometric data at training time] diff --git a/docs/research/soul/scanning-process.md b/docs/research/soul/scanning-process.md new file mode 100644 index 00000000..a1ebd3bc --- /dev/null +++ b/docs/research/soul/scanning-process.md @@ -0,0 +1,306 @@ +# Soul Signature — Scanning Process + +**Status:** Research Specification (Pre-Implementation) +**Date:** 2026-05-24 +**Author:** ruv + +--- + +## 1. Hardware Prerequisites + +### 1.1 Full Protocol (N ≥ 3 Nodes) + +| Component | Minimum | Recommended | Notes | +|---|---|---|---| +| Sensing nodes | 3 × ESP32-S3 (ADR-028) | 5+ nodes | Multi-node triangulation reduces angle-dependent blind spots; ADR-029 multistatic mesh | +| Compute appliance | Cognitum Seed (Pi 5 + Hailo) | Same | Runs the field model, AETHER inference, vitals pipeline | +| Network link | 2.4 GHz or 5 GHz AP | Dedicated sensing AP | Shared AP with user traffic degrades CSI frame rate | +| Firmware version | ADR-110 v0.7.0+ | Same | Ed25519 witness chain required for attestation | +| Clock sync | 802.15.4 time-sync (ESP32-C6) or NTP fallback | 802.15.4 preferred | ±100 µs alignment per ADR-110; NTP gives ±5 ms | + +### 1.2 Degraded Mode (1 Node) + +A single-node enrollment produces an incomplete signature: +- Skeletal proportions: degraded (single-angle view) +- Subcarrier reflection profile: single orientation only (3-orientation protocol collapses to 1) +- AETHER embedding: usable but lower confidence +- Cardiac / respiratory: unaffected (single-node sufficient) +- Gait timing: usable if node placement allows bidirectional walk + +Single-node signatures MUST be tagged `degraded_mode: true` in the manifest. The +match score uses only the channels that met minimum confidence thresholds. The +soul signature is technically valid but should be re-enrolled with multi-node +hardware when possible. + +### 1.3 ESP32-C6 Uplift (Wi-Fi 6 HE-LTF) + +When at least one ESP32-C6 node is present (ADR-110), the subcarrier count +expands from 52 (HT-LTF, S3) to up to 242 (HE-LTF, C6). The MERIDIAN +HardwareNormalizer (ADR-027) maps all nodes to a canonical 56-subcarrier +representation for the AETHER backbone. The full 242-subcarrier profile is +preserved in the SubcarrierReflectionProfile node for higher-fidelity matching +when available. The C6's 802.15.4 time-sync (±100 µs) also improves multistatic +coherence relative to NTP-only S3 meshes. + +--- + +## 2. Structured 60-Second Enrollment Protocol + +The enrollment protocol produces exactly one `.rvf` soul signature file. The +protocol is structured into five phases with exact timing. A human-readable +prompt sequence should be delivered to the subject via audio or display. + +### Phase 0 — Empty-Room Field Recalibration (T+0 to T+10) + +Before the subject enters the sensing zone, the room must be empty and the +ADR-030 field model must be current. + +``` +T+0s : System checks field model age. Maximum age: 4 hours. + If stale or absent → run field recalibration: + Collect 1,200 CSI frames at 20 Hz (60 seconds of empty room) + Compute per-link Welford mean and covariance + Run SVD on covariance matrix → top-K=8 eigenmode vectors + Store in field_model.rs::FieldNormalMode + +T+0–10s: Quiet sampling of empty-room field state. No subject present. + Operator prompt: "Please ensure the room is empty." + System: verifies presence score < 0.1 (ADR-039 Tier 2 presence detection). + Failure: if presence score ≥ 0.1, abort and report FAIL_ROOM_NOT_EMPTY. +``` + +This phase is skipped (not aborted) if the field model was updated within the +last 4 hours AND the current empty-room sampling confirms presence score < 0.05. + +### Phase 1 — Deep Breathing Baseline (T+10 to T+25) + +Subject enters the sensing zone and performs five deep breathing cycles. + +``` +T+10s : Subject enters scan zone. System detects presence. + Operator prompt: "Please stand still and breathe slowly and deeply." + +T+10–25s: Subject stands at zone center, facing node cluster. + Five complete breath cycles, each ≥ 4 seconds. + System collects: + - ADR-021 BreathingExtractor: baseline_bpm, depth_amplitude, + inspiration_expiration_ratio, HRV_RSA + - ADR-021 HeartRateExtractor: initial HR, HRV_SDNN (partial) + - AETHER embedding: accumulates over 300 CSI frames (20 Hz × 15s) + Quality gate: BreathingExtractor VitalCoherenceGate must emit + PERMIT for ≥ 10 of the 15 seconds. Failure → FAIL_POOR_BREATHING_SIGNAL. +``` + +### Phase 2 — Seated Rest (T+25 to T+35) + +Subject sits to minimize motion and allow cardiac signal isolation. + +``` +T+25s : Operator prompt: "Please sit down and rest quietly." + +T+25–35s: Subject seated, minimal movement. + System collects: + - HeartRateExtractor: HR baseline, HRV_SDNN, HRV_RMSSD, + LF/HF ratio, sinus rhythm classification + - Cardiac_Waveform_Morphology: 64-coefficient wavelet decomposition + of bandpass-filtered cardiac phase signal (0.8–2.0 Hz) + Quality gate: HR confidence ≥ 0.6 for ≥ 7 of 10 seconds. + Failure → FAIL_POOR_CARDIAC_SIGNAL (soft failure: cardiac nodes + marked low-confidence; signature proceeds without them if AETHER + and gait nodes pass their own thresholds). +``` + +### Phase 3 — Gait Walk (T+35 to T+50) + +Subject walks a 2-meter line twice in each direction. + +``` +T+35s : Operator prompt: "Please walk a straight line of 2 meters back and + forth twice at your natural pace." + +T+35–50s: Subject walks: A→B, B→A, A→B, B→A (four transits, ≥ 8 strides total). + System collects (via pose_tracker.rs, ADR-029 Sect 2.7): + - GaitTimingNode: cadence, stride_period_variance, + double_support_pct, asymmetry_index, step_width_m + - SkeletalProportionsNode: torso/limb ratios from 17-keypoint + trajectory accumulated over ≥ 8 strides + - AETHER embedding: continues accumulating (300 more frames) + Quality gate: ≥ 8 strides detected with confidence ≥ 0.7 per stride. + Failure → FAIL_INSUFFICIENT_GAIT_DATA. + Note: the ruvector-mincut DynamicPersonMatcher must confirm only one + person is tracked. If two tracks are active → FAIL_MULTIPLE_SUBJECTS. +``` + +### Phase 4 — Standing Orientation Scan (T+50 to T+60) + +Subject stands at three orientations to capture the subcarrier reflection profile. + +``` +T+50s : Operator prompt: "Please stand facing the wall. I will ask you to + rotate in place twice." + +T+50–53s: Orientation 0° (subject faces primary node cluster). + System collects: SubcarrierReflectionProfile at 0° + (ADR-030 field-subtracted, 56 subcarriers, amplitude + phase). + +T+53s : Operator prompt: "Please turn 90 degrees to your right." + +T+53–56s: Orientation 90°. + System collects: SubcarrierReflectionProfile at 90°. + +T+56s : Operator prompt: "Please turn 90 degrees to your right again." + +T+56–60s: Orientation 180°. + System collects: SubcarrierReflectionProfile at 180°. + Body_Field_Coupling: computed from AETHER attention map weighted + by ADR-030 top-K=8 eigenvectors (final computation at T=60s). + +T+60s : Enrollment window closes. + AETHER embedding finalized: mean pool over all ~1,200 accumulated frames. + All node confidence values computed. +``` + +--- + +## 3. Quality Gates + +The enrollment FAILS and emits a structured error code if any of the following +conditions are met. Failed enrollments do not produce a stored `.rvf` file. + +| Gate | Condition for FAIL | Error code | +|---|---|---| +| Room occupied | Presence score ≥ 0.1 at Phase 0 end | `FAIL_ROOM_NOT_EMPTY` | +| Multiple subjects | ≥ 2 active pose tracks during Phases 1–4 | `FAIL_MULTIPLE_SUBJECTS` | +| Intermittent presence | Subject exits sensing zone for > 3 consecutive seconds | `FAIL_SUBJECT_LEFT_ZONE` | +| AETHER confidence low | Final embedding confidence < 0.6 (HNSW search confidence) | `FAIL_AETHER_LOW_CONFIDENCE` | +| Breathing signal absent | VitalCoherenceGate PERMIT rate < 67% during Phase 1 | `FAIL_POOR_BREATHING_SIGNAL` | +| Gait data insufficient | Fewer than 8 strides detected with confidence ≥ 0.7 | `FAIL_INSUFFICIENT_GAIT_DATA` | +| Field model dirty | Field model age > 4 hours and recalibration refused | `FAIL_STALE_FIELD_MODEL` | +| Adversarial detection | RuvSense adversarial.rs flags physically impossible signal | `FAIL_ADVERSARIAL_SIGNAL` | +| Node count below minimum | Fewer than 2 nodes online during Phases 3–4 | `WARN_DEGRADED_MODE` (not a hard fail; produces degraded signature) | + +Soft failures (cardiac signal only) do not abort the enrollment; they mark those +nodes as low-confidence and reduce the match weight for those channels at +recognition time. + +--- + +## 4. Fast Scan (10-Second Degraded Identification) + +A fast scan produces a partial query embedding, not a stored profile. It is used +for recognition of already-enrolled subjects, not for new enrollment. + +``` +T+0s : System checks whether field model is current (age < 4 hours). + If stale: recognition accuracy degraded; warn operator. + +T+0–10s: Subject stands still at zone center, natural breathing. + System collects: AETHER embedding (200 frames, 10s at 20 Hz). + Cardiac HR: partial (confidence typically < 0.5). + Gait: not available. + Subcarrier reflection: 1 orientation only. + +T+10s : Query issued against all stored profiles in HNSW index. + Match score computed using available channels only. + Cardiac, gait, and skeletal proportions excluded from denominator + (availability factor = 0 for absent channels). +``` + +Fast scan is acceptable for: +- Returning resident recognition (already enrolled, low-friction use case) +- Home automation triggers (occupancy attribution per ADR-115 HA-MIND) + +Fast scan is NOT acceptable for: +- Initial enrollment +- High-assurance access control +- Healthcare identification + +--- + +## 5. Continuous Mode — Implicit Signature Refinement + +In continuous operating mode, the system incrementally updates the online +aggregator for enrolled persons as they go about their normal activities. The +stored profile is re-published from the aggregator every 90 days (or on the +re-scan cadence, whichever comes first). This means a deployed system becomes +more accurate over time, not less. + +Convergence property: the Welford online statistics in the aggregator are +numerically stable and converge to the true population mean/variance as +observation count increases. The AETHER embedding accumulated over thousands +of natural-activity windows is more representative than a single 60-second +enrollment. The stored profile is replaced (not amended) on each re-publish; the +old profile is archived (not deleted) per the forward-secrecy requirements in +`security.md`. + +The continuous mode raises a consent concern: a person is effectively being +re-enrolled continuously without explicit action. This is addressed in +`security.md §4` (Consent Architecture). + +--- + +## 6. Multi-Room Enrollment + +When a person moves across multiple sensing zones (e.g., living room and bedroom +each with a Cognitum Seed node cluster), the cross-room signature works as follows: + +1. Full 60-second enrollment is performed in the primary room. This produces the + initial stored profile with `environment_normalized: false` in the manifest. + +2. When the MERIDIAN domain generalization layer (ADR-027) is active, the + HardwareNormalizer maps the enrollment embedding to the environment-invariant + subspace. The stored profile is updated to `environment_normalized: true`. + +3. In subsequent rooms, a fast scan (10s) is sufficient to attribute identity. The + MERIDIAN-normalized AETHER embedding handles the room shift. + +4. For healthcare deployments requiring room-by-room re-enrollment for regulatory + reasons, a per-room enrollment protocol runs in each room and the signatures + are linked by the opaque `person_id` field (never by raw PII). + +--- + +## 7. Re-Scan Cadence + +| Deployment context | Re-scan interval | Rationale | +|---|---|---| +| Healthy adult (residential) | 90 days | Anatomy stable; continuous mode refines continuously | +| Child (growing skeleton) | 30 days | Skeletal proportions change; gait timing changes | +| Healthcare / clinical | Per clinical event | Post-surgery, post-illness, post-significant weight change | +| Post-exercise monitoring | 7 days during active programs | Body composition changes affect RF backscatter | +| Any | On drift alert from longitudinal.rs (ADR-030 Tier 4) | System-initiated; shown to user as "calibration recommended" | + +The `longitudinal.rs` module monitors five drift metrics (GaitSymmetry, +StabilityIndex, BreathingRegularity, MicroTremor, ActivityLevel) using Welford +statistics over daily observations. When any metric exceeds 2-sigma deviation +sustained for 3 consecutive days, a `DriftAlert` is emitted. The system +displays this as "signature drift detected — re-scan recommended," not as a +health diagnosis. + +--- + +## 8. Output Artifact + +On successful completion, the enrollment pipeline produces: + +1. `signature-.rvf` — the binary soul signature container. Content-addressed. + Encrypted with the person's key (see `security.md §5`) before writing to disk. + +2. `signature-.json` — the JSON-LD sidecar for human inspection and audit. + Does not contain raw vector data. Safe to log. + +3. A row in the local HNSW index (`ruvector-core::VectorIndex`, `person_track` + subindex per ADR-024 §2.4) linking the person_id to the AETHER embedding. + This index is used for O(log n) recognition queries. + +4. An Ed25519 witness entry per ADR-110, signing + `(rvf_sha256 || timestamp_ns || enrolled_by_device_id)`. Stored in the + RVF SEG_WITNESS segment AND in the node's local audit log. + +The enrollment process does NOT: +- Transmit raw CSI or raw biometrics to any external server. +- Publish the soul signature to MQTT or Matter unless explicitly configured with + `--privacy-mode disabled` (see `security.md §6`). +- Store PII (name, email, account linkage) in the `.rvf` file. The `person_id` + field is an opaque u64. PII linkage, if any, lives in the application layer + and is governed by separate access control. diff --git a/docs/research/soul/security.md b/docs/research/soul/security.md new file mode 100644 index 00000000..eb2f95b1 --- /dev/null +++ b/docs/research/soul/security.md @@ -0,0 +1,367 @@ +# Soul Signature — Security, Privacy, and Threat Model + +**Status:** Research Specification (Pre-Implementation) +**Date:** 2026-05-24 +**Author:** ruv + +--- + +## 1. Scope + +This document defines the threat model, mitigations, cryptographic primitive +choices, privacy architecture, and open security research items for the Soul +Signature system. It is intended to be reviewed by a security engineer or +privacy counsel before any production deployment. + +The soul signature is a passive biometric system. The security bar is: +**attacker cost to achieve a false accept must exceed the value of the +protected resource for the relevant threat model**. The soul signature does +not claim to be unbreakable. It claims to be hard enough. + +--- + +## 2. What We Explicitly Do NOT Claim + +- Not equal to fingerprint scanners on FBI-tier datasets in EER terms. RF + biometrics are a younger discipline. No independent benchmark with the soul + signature's specific multi-channel fusion exists yet. +- Not legal evidence. Passive RF biometric identification has no established + legal precedent in any jurisdiction. +- Not a replacement for explicit consent in regulated contexts (healthcare, + employment, border control). +- Not unbreakable under a nation-state adversary with full physical access to + the sensing infrastructure. +- Not validated at scale beyond the constituent ADR baselines. The AETHER + channel (ADR-024) targets >80% mAP at 5 subjects; at 100+ subjects the + false-accept rate is open research. + +--- + +## 3. Threat Model + +### 3.1 Attacker: Passive Eavesdropper on the WiFi Medium + +**Capability:** An attacker near the WiFi sensing zone can observe CSI of any +person who passes through. With enough CSI, the attacker could construct an +unauthorized soul signature enrollment of an unconsenting bystander. + +**Impact:** Unauthorized enrollment → unauthorized recognition → attribution of +presence to a person who did not consent. + +**Mitigation:** +- Ambient CSI capture does NOT trigger enrollment. Enrollment requires the + explicit 60-second structured protocol. Ambient bystander CSI produces + `unauthenticated` pose tracks tagged as `person_id: NULL`. +- Unauthenticated RVF nodes are pruned from the HNSW index after 24 hours. +- The enrollment protocol requires presence confirmation from at least two + sensing nodes simultaneously, making drive-by enrollment geometrically + harder to achieve without physical proximity. + +**Residual risk:** An attacker who can be physically present in the scanning +zone for 60 seconds, under the observation of the scanning protocol, can cause +enrollment of a fake person. This requires physical co-location and is +equivalent to the threat model for any in-person biometric registration. + +### 3.2 Attacker: Active Replay + +**Capability:** An attacker records a CSI stream from a legitimate enrollment +or recognition event and replays it to a sensing node to impersonate the +enrolled person. + +**Impact:** False positive recognition; unauthorized access or presence attribution. + +**Mitigation:** +- Each enrollment is bound to the room's ADR-030 field model eigenstate at + enrollment time. The `environment_id` field in every vector node is a + SHA-256 of the field model's eigenmode matrix. A replay in a different room + produces a different `environment_id` and a dramatically different + Subcarrier_Reflection_Profile — the cross-validation between these two + signed fields fails. +- The Ed25519 witness chain (ADR-110) includes a monotonic timestamp + (`timestamp_ns`). A replay of an old signature is detected by the timestamp + freshness check at recognition time (configurable; default: reject any + signature older than 7 days for high-assurance contexts). +- The ADR-030 field model continuously updates. Even if the replay is in the + same room, the field model's eigenstate changes as furniture is moved or + temperature shifts the propagation medium; cross-validation degrades over + time. + +**Residual risk:** Replay within the same room within a short time window +(< 4 hours, before the field model rotates) by an attacker who has recorded the +original CSI with high fidelity remains a plausible attack vector. This is not +defended against by the current architecture. It requires a future ADR for +challenge-response liveness detection. + +### 3.3 Attacker: Phased-Array Vest / RF Body Emulator + +**Capability:** An attacker wears a device capable of emitting RF signals that +mimic another person's backscatter profile, allowing them to be recognized as +the enrolled person. + +**Impact:** The strongest impersonation attack; if successful, bypasses all +electromagnetic biometric channels simultaneously. + +**Mitigation:** +- The RuvSense `adversarial.rs` module (ADR-030 Tier 7) enforces four + physics-based consistency checks: + 1. Multi-link consistency: a real body perturbs all mesh links passing + through its location. A vest emitting signals affects only the targeted + link(s). Detection: at least 4 links must show correlated perturbation. + 2. Field model constraints: the perturbation must lie within the span of + the room's eigenmode structure. Artificially injected signals produce + perturbations inconsistent with room geometry. + 3. Temporal continuity: real movement is smooth in embedding space; injected + signals can produce discontinuities flagged by the embedding velocity + monitor. + 4. Energy conservation: total perturbation energy across all links must be + consistent with the number and geometry of bodies present. +- The adversarial detector fires `FAIL_ADVERSARIAL_SIGNAL` before the soul + signature match is considered. + +**Residual risk:** A sophisticated attacker with a calibrated phased-array +system who also knows the room's eigenmode structure and the enrolled person's +exact multi-link backscatter pattern could in principle construct a convincing +emulation. This is a high-capability, high-cost attack. Practical countermeasure: +require multi-node confirmation (ADR-029 multistatic) which raises the +geometric complexity of the emulation exponentially with node count. + +### 3.4 Attacker: Insider with Broker Access + +**Capability:** A privileged operator or compromised service with read access +to the stored `.rvf` files and the HNSW person_track index. + +**Impact:** Exfiltration of biometric signatures; linkage of person_id to PII +if linkage tables also accessible; replay or cross-site re-enrollment. + +**Mitigation:** +- At-rest encryption: all `.rvf` files are encrypted with ChaCha20-Poly1305 + using a key derived via Argon2id from a user-provided passphrase (or a FIDO2 + hardware token binding). The Cognitum Seed appliance NEVER stores the + decryption key; it is re-derived from the passphrase on each access. +- The opaque `person_id` (u64) in the `.rvf` file is not PII. PII linkage, if + any, requires access to a separate application-layer database not stored on + the sensing appliance. +- The HNSW index stores only the 128-dim AETHER embedding, not raw CSI or full + soul signatures. Exfiltration of the index exposes the embedding but not the + full biometric record. +- Differential privacy (ADR-106 DP-SGD) applies at training time when AETHER + is fine-tuned on enrolled-person data, preventing membership inference attacks + that could recover training samples from model weights. + +**Residual risk:** If the passphrase is weak or the FIDO2 token is compromised, +the at-rest encryption fails. Key management is a deployment responsibility. + +### 3.5 Attacker: Manufacturer / Firmware Supply Chain + +**Capability:** A malicious firmware update to the ESP32 node or Cognitum Seed +appliance could silently exfiltrate soul signatures or CSI streams. + +**Impact:** Large-scale passive surveillance; biometric data exfiltration across +all installed appliances. + +**Mitigation:** +- All firmware releases are signed with Ed25519 (ADR-100 cog packaging) and + verified by the appliance before installation. A Dilithium-3 post-quantum + co-signature is added in the transition window (ADR-109). +- The Ed25519 witness chain (ADR-110) signs each CSI frame bundle at the + sensor level. A firmware change that alters the witness chain is detectable + by downstream audit. +- Network egress from the Cognitum Seed in `--privacy-mode` is blocked for + raw CSI and soul signatures by default. Only MQTT auto-discovery messages + (ADR-115) and OTA metadata are permitted outbound. +- Open-source firmware. The ESP32 firmware and Cognitum Seed Rust crates are + open source (this repository). Independent audit is possible. + +**Residual risk:** A zero-day exploit in the ESP-IDF WiFi stack or the Rust +codebase could bypass these controls. This is mitigated by regular security +audits (run `npx @claude-flow/cli@latest security scan` per CLAUDE.md) but not +eliminated. + +--- + +## 4. Consent Architecture + +### 4.1 The Enrollment-vs-Recognition Distinction + +The soul signature system enforces a hard distinction: + +| Action | Consent required | Mechanism | +|---|---|---| +| Enrollment | Explicit, active | 60-second protocol with operator confirmation; produces signed `.rvf` | +| Recognition of enrolled person | Implicit (enrollment = consent for recognition) | Continuous mode; HNSW match | +| Ambient sensing of unenrolled person | No — but data is transient and pruned | Unauthenticated tracks; 24h TTL | +| Updating stored profile from continuous mode | Implicit (set at enrollment time) | Aggregator auto-refresh; configurable | + +The system operator is responsible for obtaining appropriate consent from +persons before performing enrollment. The technical system enforces that +enrollment cannot happen accidentally or from drive-by sensing. + +### 4.2 Bystander Protection + +Persons who pass through a sensing zone without being enrolled are sensed but +not persistently identified. Their data flow: +1. Pose tracker produces a track tagged `person_id: NULL`. +2. AETHER embedding is computed for motion detection and occupancy counting + (ADR-115 HA-MIND). +3. The embedding is written to the `temporal_baseline` HNSW index with a 24-hour + TTL and `authenticated: false`. +4. After 24 hours, the entry is automatically pruned by the `EmbeddingIndex::prune()` + method (ADR-024 §2.4). +5. No `.rvf` file is created. No persistent record exists. + +This architecture satisfies the GDPR principle of data minimization (Article 5(1)(c)) +for bystander data: the retention period is bounded, the data is not linked to +an identity, and the storage is proportionate to the functional purpose +(occupancy counting). + +### 4.3 GDPR / HIPAA Mode + +When `--privacy-mode enabled` (from ADR-115 HA-MIND §privacy): + +1. Soul signatures are computed and stored locally only. They are NEVER + published to MQTT topics, Matter clusters, or any external endpoint. +2. The local REST API for accessing soul signatures requires a valid bearer + token (ADR-028 bearer_auth.rs). No unauthenticated endpoint exposes + biometric data. +3. The JSON-LD sidecar is written to the local encrypted store only. It is not + included in MQTT auto-discovery payloads. +4. The longitudinal drift metrics (ADR-030 Tier 4) are published to MQTT in + aggregated form only (e.g., `drift_detected: true`, never raw metric values + that could be used for medical inference). +5. A data deletion endpoint must be implemented: `DELETE /api/v1/persons/{id}` + removes the `.rvf` file, the HNSW index entry, the JSON-LD sidecar, and all + longitudinal Welford statistics for that person_id. + +--- + +## 5. Cryptographic Primitives + +All primitives are chosen from NIST-approved or widely-audited standards. + +| Purpose | Primitive | Rationale | +|---|---|---| +| Content integrity (per-segment) | CRC32 (IEEE 802.3) | Already implemented in `rvf_container.rs:line 70`. Corruption detection, not security. | +| Content addressing | SHA-256 | File name derivation; pre-image resistance prevents name collisions | +| Ed25519 signatures | Ed25519 (RFC 8032) | ADR-110 witness chain; 64-byte signatures; 128-bit security | +| At-rest encryption | ChaCha20-Poly1305 (RFC 8439) | AEAD; software-friendly; no timing-attack surface like AES-CBC; 256-bit key | +| Key derivation from passphrase | Argon2id (RFC 9106) | Memory-hard KDF; resistant to GPU/ASIC brute-force; recommended by NIST SP 800-132 draft (2024) | +| DP-SGD noise | Gaussian N(0, σ²C²I) per ADR-106 | (ε, δ)-DP per Abadi et al. 2016 Moments Accountant | +| Post-quantum key exchange (future) | Kyber-768 (NIST FIPS 203, 2024) | ADR-108; ~AES-192 security; NIST CNSA 2.0 recommended | +| Post-quantum signatures (future) | Dilithium-3 (NIST FIPS 204, 2024) | ADR-109; hybrid mode with Ed25519 during transition window | + +### 5.1 Argon2id Parameters + +Default parameters for soul signature key derivation: + +``` +m_cost = 65536 (64 MB memory) +t_cost = 3 (3 iterations) +p_cost = 4 (4 parallel lanes) +output_len = 32 bytes (256-bit key for ChaCha20-Poly1305) +salt = 16 random bytes stored alongside encrypted blob (NOT the person_id) +``` + +These parameters provide ~100ms KDF time on a Pi 5, which is acceptable for +enrollment (one-time) and recognition (HNSW match precedes decryption, so +decryption is only triggered after a candidate match). + +### 5.2 Forward Secrecy + +Old soul signature files are NOT keys for new ones. Compromise of a 90-day-old +`.rvf` file does not unlock the current profile. The key is derived from the +user's passphrase each time, not derived from the previous file. + +Archived files (kept for audit purposes) are re-encrypted on passphrase rotation +if the operator elects to do so via the `soul-signature re-encrypt --all` CLI +command (not yet implemented; specified here for future ADR). + +--- + +## 6. Privacy Mode Integration (ADR-115) + +The `--privacy-mode` flag defined in ADR-115 HA-MIND §9 is extended to cover +soul signature data: + +| Privacy mode | MQTT publish | REST API | Local storage | HNSW index | +|---|---|---|---|---| +| `disabled` (default for home users) | Aggregated presence/count only | Authenticated bearer required | Encrypted at rest | Local only | +| `enabled` | Nothing biometric | Authenticated bearer required | Encrypted at rest | Local only | +| `research` (explicit opt-in) | Full soul signature nodes (anonymized person_id) | Open (for research deployments only) | Encrypted at rest | Exportable | + +The `research` mode requires a separate `--research-consent-token` flag and is +intended for academic data collection under IRB approval. It must never be the +default. + +--- + +## 7. Open Research and Outstanding Security Work + +The following items are known security gaps or open research questions. Each +warrants a future ADR before production deployment at scale. + +**7.1 Challenge-Response Liveness Detection** +Replay attacks within a short time window (see §3.2 residual risk) are not +defended against. A future mechanism should issue a random challenge (e.g., +"please raise your left hand") and verify the CSI response matches the challenge +before accepting a recognition. This eliminates replay as a practical attack +vector. Future ADR: ADR-120 (proposed). + +**7.2 False-Accept Rate at Scale (N > 20 subjects)** +The AETHER baseline (ADR-024) is tested at 5 subjects (>80% mAP). For household +deployments this is sufficient. For building-scale deployments (50-500 subjects), +the FAR is open research. Independent benchmarking on a dataset of 20+ subjects +with the full 7-channel fusion is required before building-scale deployment can +be recommended. Publication target: co-locate with ADR-027 MERIDIAN evaluation. + +**7.3 Side-Channel Leakage from Encrypted RVF Files** +The file size of an encrypted `.rvf` blob is observable by an attacker with +filesystem access. File size is a function of the number of nodes present, which +reveals whether the cardiac channel was captured (high-SNR enrollment vs +low-SNR enrollment). This is a minor information leak. Mitigation: pad all +`.rvf` files to a fixed 64 KB boundary. Future ADR: append to ADR-106. + +**7.4 Membership Inference in Continuous Mode** +In continuous mode, the AETHER model is fine-tuned on the enrolled person's +data over months. An adversary with access to the model weights before and after +a re-train cycle could infer that a specific enrollment occurred, even without +the soul signature file, via membership inference (Shokri et al. 2017). +ADR-106 DP-SGD mitigates this for federation round deltas but not for local +single-device fine-tuning. Extension of DP-SGD to the local continuous-mode +update is required. Future ADR: extend ADR-106. + +**7.5 Physical Access to Sensing Nodes** +An attacker with physical access to an ESP32 node can extract the firmware and +attempt to reverse the Ed25519 signing key (if the key is stored in ESP32 +NVS without protection). ADR-110 uses NVS for key storage. A future ADR should +mandate secure element storage (e.g., ATECC608A co-processor on the Cognitum +Seed) for the signing key. Future ADR: ADR-121 (proposed). + +**7.6 Federated Learning Linkability** +When AETHER is retrained via federated learning (ADR-105), the LoRA weight +deltas carry information about enrolled persons. ADR-106 applies DP-SGD to +these deltas, but the post-quantum migration path (ADR-108 Kyber-768) is not +yet integrated with the federation protocol. Until ADR-108 Phase 2 ships, the +federation link is classically encrypted and vulnerable to harvest-now-decrypt-later +attacks by quantum-capable adversaries. Assessed risk: low until 2027. + +--- + +## 8. Summary Security Properties Table + +| Property | Status | Evidence | +|---|---|---| +| At-rest encryption | Specified (ChaCha20-Poly1305 + Argon2id) | This document §5 | +| Ed25519 attestation | Implemented | ADR-110 witness chain | +| Replay resistance (cross-room) | Implemented | ADR-030 field model environment_id binding | +| Replay resistance (same-room, short window) | Open gap | §7.1 | +| Anti-spoofing (single-link injection) | Implemented | adversarial.rs multi-link consistency | +| Anti-spoofing (phased-array vest) | Partial | adversarial.rs + energy conservation; residual risk documented | +| Bystander protection | Specified | 24h TTL on unauthenticated tracks; §4.2 | +| DP-SGD training privacy | Implemented (federation) | ADR-106 | +| DP-SGD training privacy (local continuous mode) | Open gap | §7.4 | +| GDPR data deletion | Specified | §4.3 `DELETE /api/v1/persons/{id}` | +| Post-quantum migration path | Specified (Kyber-768, Dilithium-3) | ADR-108, ADR-109 | +| Firmware supply chain integrity | Implemented (Ed25519 cog signing) | ADR-100, ADR-109 hybrid | +| False-accept rate at scale | Open research | §7.2 | +| Liveness detection | Open gap | §7.1 | +| Secure element key storage | Open gap | §7.5 | diff --git a/docs/research/soul/specification.md b/docs/research/soul/specification.md new file mode 100644 index 00000000..e452f6e3 --- /dev/null +++ b/docs/research/soul/specification.md @@ -0,0 +1,525 @@ +# Soul Signature — Technical Specification + +**Status:** Research Specification (Pre-Implementation) +**Date:** 2026-05-24 +**Author:** ruv + +--- + +## 1. Overview + +A Soul Signature is a typed, content-addressed RVF graph encoding seven +electromagnetic observables extracted from a person in a WiFi-DensePose sensing +zone. The graph is stored as a single `.rvf` binary blob using the existing RVF +container format (`v2/crates/wifi-densepose-sensing-server/src/rvf_container.rs`) +extended with two new segment types defined below. A human-readable JSON sidecar +accompanies the blob for inspection and provenance. + +The signature is probabilistic, not deterministic. Matching computes a weighted +cosine similarity across graph dimensions, producing a score in [0, 1] with a +calibrated false-accept rate (FAR). The FAR at a given threshold is an open +research question; the AETHER person re-identification baseline (ADR-024 §2.8: +>80% mAP at 5 subjects) is the lower bound for the primary embedding channel. + +--- + +## 2. Design Principles + +### 2.1 Per-Individual + +The signature encodes features that are structurally unique to one person at the +sensing resolution of commodity WiFi hardware. Discriminative dimensions include: +cardiac timing (R-R interval structure), respiratory mechanics (tidal depth, +inspiration-to-expiration ratio), skeletal proportions (limb ratios from 17-keypoint +pose, ADR-079), gait cadence variability, and the RF backscatter profile shaped by +body mass distribution and geometry. + +### 2.2 Passive at Enrollment Time + +No explicit action from the subject is required at recognition time after +enrollment. Recognition fires whenever an enrolled person is detected in a sensing +zone. Enrollment itself requires a 60-second structured protocol (see +`scanning-process.md`). This is a deliberate asymmetry: passive recognition + +active enrollment — which is the same model used by FaceID (passive unlock after +initial face setup). + +The passivity of post-enrollment recognition is a privacy concern addressed in full +in `security.md` §4. + +### 2.3 Multi-Modal + +Seven orthogonal channels contribute. Orthogonality matters: if one channel +degrades (e.g., cardiac is masked by motion), the remaining six carry the match. +No single channel is necessary for a positive identification above threshold; +the fused score is a weighted aggregate. + +### 2.4 Persistent Across Time + +The stored signature is valid over weeks to months for adults with stable anatomy +and health. Re-scan cadence is prescribed in `scanning-process.md`. The +`longitudinal.rs` module (ADR-030 Tier 4) provides the drift detection that +flags when a re-scan is necessary. + +### 2.5 Defensible False-Accept Rate + +The security model is not "unbreakable." It is "attacker cost exceeds value of +attack for the threat model in §security." See `security.md` §3. + +--- + +## 3. Signature as a Typed RVF Graph + +### 3.1 Container Format + +The soul signature reuses the RVF binary container defined in +`v2/crates/wifi-densepose-sensing-server/src/rvf_container.rs` (lines 1–660). +Existing segment types used: + +| Segment type | Const | Purpose in soul signature | +|---|---|---| +| `SEG_MANIFEST` | `0x05` | Graph metadata: schema version, enroll timestamp, device ID, person_id (opaque u64) | +| `SEG_VEC` | `0x01` | AETHER 128-dim embedding weights (backbone + projection head) | +| `SEG_META` | `0x07` | JSON overlay: all non-vector node attributes | +| `SEG_WITNESS` | `0x0A` | Ed25519 signature over `(content_hash_sha256 || timestamp_ns || enrolled_by_device_id)` | +| `SEG_EMBED` | `0x0C` | AETHER embedding config + projection head weights (ADR-024 Phase 7) | +| `SEG_LORA` | `0x0D` | Per-environment LoRA deltas for environment-adapted query | + +Two new segment types are proposed for the soul signature extension: + +| Segment type | Const | Purpose | +|---|---|---| +| `SEG_SOUL_GRAPH` | `0x10` | JSON-serialized graph: node list + edge list + attribute schemas | +| `SEG_SOUL_INDEX` | `0x11` | Per-node HNSW index serialization for fast graph-level query | + +The `SegmentHeader` structure is unchanged. Each segment is 64-byte aligned +(field `alignment_pad` at offset `0x3C`). CRC32 content hash at offset `0x28` +covers the payload, providing tamper detection per the existing implementation +at `rvf_container.rs:line 70`. + +### 3.2 Node Types + +Each node is a typed struct. Serialized into SEG_META as a JSON object with a +`node_type` discriminator string. Vector fields (f32 arrays) are co-located in +a SEG_VEC segment indexed by the node's `vec_segment_id` field. + +#### Node: AETHER_Embedding + +Primary identity anchor. The contrastive CSI embedding from ADR-024. + +```rust +pub struct AetherEmbeddingNode { + pub node_type: &'static str, // "AETHER_Embedding" + pub vec_segment_id: u64, // references SEG_VEC containing 128 f32s + pub embedding_dim: usize, // 128 + pub backbone: String, // "csi-to-pose-transformer" + pub pretrain_method: String, // "simclr+vicreg" + pub alignment_score: f32, // Lowman alignment metric at enrollment time + pub uniformity_score: f32, // Hypersphere uniformity at enrollment time + pub enrollment_frames: u32, // Number of CSI windows averaged into this node + pub environment_id: String, // SHA-256 of field model eigenstate at enrollment + pub confidence: f32, // HNSW search confidence against person_track index +} +``` + +Stored size: 128 × 4 = 512 bytes in SEG_VEC; JSON metadata ~200 bytes in SEG_META. +Per ADR-024 §2.8, the person re-identification target is >80% mAP at 5 subjects. +At 10+ subjects the accuracy is open research; baseline TBD. + +#### Node: Cardiac_HR_Profile + +Extracted from the ADR-039 vitals pipeline (magic `0xC511_0002`, fields offset 6-11: +breathing_rate at `u16 LE` BPM×100, heart_rate at `u32 LE` BPM×10000). +For the soul signature, cardiac extraction uses the ADR-021 bandpass pipeline +(0.8–2.0 Hz) over a minimum 30-second rest window. + +```rust +pub struct CardiacHRProfileNode { + pub node_type: &'static str, // "Cardiac_HR_Profile" + pub baseline_bpm: f32, // mean HR over enrollment window (40–180 BPM range) + pub hrv_sdnn_ms: f32, // SDNN: std dev of R-R intervals (ms) + pub hrv_rmssd_ms: f32, // RMSSD: root mean square successive differences + pub hrv_lf_power: f32, // LF band power (0.04–0.15 Hz), normalized + pub hrv_hf_power: f32, // HF band power (0.15–0.4 Hz), normalized + pub hrv_lf_hf_ratio: f32, // LF/HF ratio (autonomic balance marker) + pub sinus_rhythm_class: u8, // 0=regular, 1=irregular, 2=indeterminate + pub confidence: f32, // from ADR-021 VitalCoherenceGate PERMIT fraction + pub window_seconds: u32, // duration of the measurement window +} +``` + +WiFi CSI-based HRV extraction is an active research area. The SDNN and RMSSD values +are discriminative at group level (Zhao et al. 2017, Widar 3.0 2019) but per-person +uniqueness has not been independently validated at scale. Status: open research. + +#### Node: Cardiac_Waveform_Morphology + +Wavelet decomposition of the bandpass-filtered cardiac phase signal. Captures the +shape of the cardiac waveform, not just its rate. More discriminative than HR alone +but requires higher SNR and longer measurement window. + +```rust +pub struct CardiacWaveformMorphologyNode { + pub node_type: &'static str, // "Cardiac_Waveform_Morphology" + pub vec_segment_id: u64, // references SEG_VEC: 64 f32 wavelet coefficients + pub wavelet_family: String, // "db4" (Daubechies 4, standard for cardiac) + pub decomposition_levels: u8, // 4 levels + pub snr_db: f32, // measured SNR at enrollment; low-SNR nodes down-weighted + pub confidence: f32, +} +``` + +Wavelet coefficient dimension: 64 floats = 256 bytes in SEG_VEC. Waveform +morphology from CSI is highly environment-dependent; the ADR-030 field model +subtraction must run before this measurement is taken to isolate body perturbation +from room standing-wave artifacts. + +#### Node: Respiratory_Pattern + +Extracted by the ADR-021 BreathingExtractor (0.1–0.5 Hz bandpass) plus the +ADR-030 persistence layer that accumulates statistics over the enrollment window. + +```rust +pub struct RespiratoryPatternNode { + pub node_type: &'static str, // "Respiratory_Pattern" + pub baseline_bpm: f32, // mean RR (normal adult: 12–20 BPM) + pub depth_amplitude_normalized: f32, // tidal depth proxy from CSI variance + pub inspiration_expiration_ratio: f32, // I:E ratio (1:1.5 to 1:3 typical) + pub hrv_rsa_power: f32, // respiratory sinus arrhythmia spectral power + pub apnea_index: f32, // events per hour of significant pauses + pub waveform_regularity: f32, // coefficient of variation of breath intervals + pub confidence: f32, + pub window_seconds: u32, +} +``` + +Note: the `apnea_index` field is a biophysical proxy signal (pause events in +the signal), not a clinical AHI score. It is provided for signature +discriminability, not diagnostic use. + +#### Node: Gait_Timing + +Extracted from the 17-keypoint Kalman pose tracker (`pose_tracker.rs`, ADR-029 +Sect 2.7) during the gait phase of the enrollment protocol. The tracker uses +ruvector-mincut for person separation and AETHER re-ID for identity continuity. + +```rust +pub struct GaitTimingNode { + pub node_type: &'static str, // "Gait_Timing" + pub cadence_steps_per_min: f32, // steps per minute + pub stride_period_variance: f32, // coefficient of variation of stride period + pub double_support_pct: f32, // fraction of gait cycle in double support + pub asymmetry_index: f32, // |left_stride - right_stride| / mean_stride + pub step_width_m: f32, // lateral distance between foot strikes (proxy) + pub velocity_variance: f32, // gait speed variability + pub confidence: f32, + pub stride_count: u32, // number of strides captured during enrollment +} +``` + +Gait biometrics from WiFi CSI are documented in WiGait (Adib et al., SIGCOMM +2015) and WiDraw (Wang et al., MobiCom 2014). Discrimination across 10+ subjects +in the same household is an open research question for the WiFi-only modality. + +#### Node: Skeletal_Proportions + +Derived from the ADR-079 camera + CSI paired keypoint pipeline when available, +or from CSI-only pose estimation (ADR-023 CsiToPoseTransformer) in camera-free +deployments. Encodes body geometry as ratios (not absolute values) for scale +invariance. + +```rust +pub struct SkeletalProportionsNode { + pub node_type: &'static str, // "Skeletal_Proportions" + pub torso_to_leg_ratio: f32, // torso height / leg length + pub shoulder_to_hip_ratio: f32, // shoulder width / hip width + pub upper_to_lower_arm_ratio: f32, // upper arm / forearm + pub upper_to_lower_leg_ratio: f32, // thigh / shin + pub head_to_torso_ratio: f32, // head height / torso height + pub arm_span_to_height_ratio: f32, // Vitruvian ratio (close to 1.0 for most adults) + pub confidence: f32, + pub keypoint_source: String, // "camera_paired" | "csi_only" | "fused" +} +``` + +CSI-only skeletal proportion estimation has ~15–25% error on individual ratio +values (open research; baseline from ADR-023 MPJPE ~91.7 mm at best, per +Person-in-WiFi 3D, CVPR 2024). Camera-paired values (ADR-079) are substantially +more accurate. The node degrades gracefully when only CSI is available. + +#### Node: Subcarrier_Reflection_Profile + +The per-subcarrier amplitude attenuation and phase shift profile measured when +the subject stands still at three orientations (0°, 90°, 180° rotation). This +encodes the body's RF backscatter cross-section shape, which is determined by +body mass distribution, limb geometry, and clothing/material factors. + +```rust +pub struct SubcarrierReflectionProfileNode { + pub node_type: &'static str, // "Subcarrier_Reflection_Profile" + pub vec_segment_id: u64, // SEG_VEC: 56 × 3 × 2 = 336 f32s + // (56 subcarriers × 3 orientations × + // [amplitude_attenuation, phase_shift]) + pub n_subcarriers: u8, // 56 (HT-LTF) or up to 242 (HE-LTF, ADR-110 C6) + pub n_orientations: u8, // 3 + pub frequency_mhz: u32, // center frequency at measurement time + pub environment_id: String, // references field model used for subtraction + pub confidence: f32, +} +``` + +This node directly exploits the ADR-030 field model: the empty-room baseline +eigenstate is subtracted before computing the reflection profile, isolating the +person's contribution. Without ADR-030 field subtraction, the profile is too +environment-coupled to be transferable across rooms. With MERIDIAN (ADR-027), +the hardware-normalizer layer maps ESP32-S3 (52 subcarriers HT-LTF) and +ESP32-C6 (242 subcarriers HE-LTF per ADR-110) into a canonical 56-subcarrier +representation before this measurement. + +Stored: 336 × 4 = 1,344 bytes in SEG_VEC. + +#### Node: Body_Field_Coupling + +The AETHER attention map cells weighted by the ADR-030 room eigenmode structure. +Encodes how strongly the person's body couples to each dominant electromagnetic +mode of the room. This is the most physics-grounded node: it captures the +person's interaction with the actual electromagnetic geometry of the space. + +```rust +pub struct BodyFieldCouplingNode { + pub node_type: &'static str, // "Body_Field_Coupling" + pub vec_segment_id: u64, // SEG_VEC: n_eigenmodes × n_keypoints f32s + pub n_eigenmodes: u8, // top-K SVD modes from field_model.rs (default K=8) + pub n_keypoints: u8, // 17 (COCO) + pub eigenmode_energy_fractions: Vec, // fraction of total variance per mode + pub environment_id: String, // must match SubcarrierReflectionProfile env + pub confidence: f32, +} +``` + +This node is only valid when the same room's field model is available. For +cross-room recognition, MERIDIAN's environment-disentangled embedding (ADR-027) +is used instead. The BodyFieldCoupling node provides additional discriminative +power in single-room deployments and degrades to optional in multi-room contexts. + +--- + +### 3.3 Edge Types + +Edges are stored in the SEG_SOUL_GRAPH JSON array. Each edge has a typed +relationship that constrains how the nodes may be used in matching. + +| Edge type | Source node(s) | Target node(s) | Semantics | +|---|---|---|---| +| `derived_from` | FieldModel_Residual (implicit) | AetherEmbedding | The embedding was computed after field model subtraction | +| `correlates_with` | Cardiac_HR_Profile | Respiratory_Pattern | Cardiorespiratory coupling at measurement time; correlation coefficient stored as edge weight | +| `temporally_colocated` | Any pair | Any pair | Both nodes were measured in the same time window; ensures consistency | +| `temporally_after` | Post-gait node | Pre-gait node | Nodes acquired sequentially during enrollment protocol | +| `requires_field_model` | SubcarrierReflectionProfile | BodyFieldCoupling | Matching this node requires the same room's ADR-030 field model | +| `fuses` | AetherEmbedding | SubcarrierReflectionProfile | MERIDIAN-normalized fusion: both mapped to environment-invariant space | +| `attested_by` | Any leaf node | WitnessChain | Ed25519 witness covers this node's content hash | +| `derived_by_keypoint_tracker` | GaitTiming | SkeletalProportions | Both extracted from the same pose_tracker.rs output | +| `environment_normalized` | Any node with `environment_id` | MERIDIAN manifest | MERIDIAN (ADR-027) was applied; signature is cross-room capable | + +--- + +### 3.4 The Aggregator vs. the Stored Profile + +Two distinct graph instances exist in the runtime: + +**Online Aggregator** — a mutable, in-memory graph that accumulates measurements +across multiple sensing windows. Nodes are incrementally updated with Welford +online statistics (`field_model.rs::WelfordStats`). Confidence fields grow toward +1.0 as more frames accumulate. The aggregator never writes to disk during +normal operation. + +**Stored Profile** — an immutable, content-addressed `.rvf` file on disk. It is +generated from the aggregator at the end of the enrollment protocol, when all node +confidence fields exceed their minimum thresholds. The stored profile is the +canonical soul signature. + +``` +Online Aggregator (RAM) Stored Profile (disk / secure enclave) ++----------------------+ +---------------------------+ +| AETHER_Embedding | enrollment | signature-.rvf | +| accumulated over | completion | SEG_MANIFEST | +| 60-second protocol +-------------> | SEG_VEC (embedding + refl)| +| Confidence: 0.0→1.0 | when all | SEG_META (all node attrs) | +| | gates pass | SEG_EMBED (AETHER config) | +| Cardiac_HR_Profile | | SEG_WITNESS (Ed25519) | +| accumulated 30s rest | | SEG_SOUL_GRAPH (graph) | ++----------------------+ +---------------------------+ +``` + +The aggregator pattern ensures that a partial scan (e.g., subject leaves after +20 seconds) never produces a stored profile — the quality gates prevent premature +commitment (see `scanning-process.md §5`). + +--- + +### 3.5 Serialization + +**Binary container:** RVF blob, per `rvf_container.rs`. All numeric data is +little-endian, f32 IEEE 754. Segment alignment: 64 bytes. CRC32 (IEEE 802.3 +polynomial) over each segment payload. + +**Content addressing:** The file name is: +``` +signature-.rvf +``` +SHA-256 is computed over the complete concatenated RVF byte stream after +`RvfBuilder::build()`. This is a different hash from the per-segment CRC32; +the CRC32 provides corruption detection within segments, the SHA-256 provides +content-based addressing and enables deduplication. + +**JSON-LD sidecar:** An optional `signature-.json` file with the same +base name. Structure: + +```json +{ + "@context": "https://ruv.net/soul-signature/v1", + "schema_version": "0.1.0", + "person_id": "", + "enrolled_at": "2026-05-24T00:00:00Z", + "enrolled_by_device_id": "", + "rvf_sha256": "", + "nodes": [ + { "node_type": "AETHER_Embedding", "confidence": 0.92, ... }, + { "node_type": "Cardiac_HR_Profile", "confidence": 0.85, ... }, + ... + ], + "edges": [...], + "witness": { + "algorithm": "Ed25519", + "public_key": "", + "signature": "", + "signed_fields": ["rvf_sha256", "enrolled_at", "enrolled_by_device_id"] + } +} +``` + +The JSON-LD sidecar is human-readable and intended for audit and provenance. +It does not contain raw biometric vectors; those stay in the RVF blob. + +**ISO/IEC 19794-4 alignment:** The soul signature's graph-based vector template +is conceptually analogous to the ISO/IEC 19794-4 finger image data format +and ISO/IEC 19794-2 minutiae data. The node/edge schema is not binary-compatible +with ISO 19794, but the design intent (typed attribute records, quality scores, +creator provenance) follows the same standard's principles. Future work may +include a conformance layer if regulatory certification is sought. + +--- + +### 3.6 Matching Algorithm + +Given a stored profile `P` and a query embedding `Q` derived from a live sensing +window, the match score is computed as a weighted sum of per-channel cosine +similarities: + +``` +match_score = sum_i ( w_i * cosine_sim(P.channel_i, Q.channel_i) ) + / sum_i ( w_i * availability(P.channel_i, Q.channel_i) ) +``` + +Where `availability` is 1.0 if both nodes are present and 0.0 if either is absent +(graceful degradation when a channel cannot be measured in the query window). + +Default weights (open research; these are design intent, not validated): + +| Channel | Weight | Rationale | +|---|---|---| +| AETHER_Embedding | 0.35 | Primary identity anchor; best-studied channel | +| Subcarrier_Reflection_Profile | 0.20 | Body geometry; angle-stable | +| Cardiac_HR_Profile | 0.15 | Physiologically stable in healthy adults | +| Gait_Timing | 0.15 | Well-studied biometric; discriminative | +| Respiratory_Pattern | 0.10 | More variable than cardiac | +| Skeletal_Proportions | 0.05 | Proxy for body shape; CSI-only is noisy | +| Body_Field_Coupling | 0.00 (single-room) / 0.10 (cross-room disabled) | Valid only when room field model available | +| Cardiac_Waveform_Morphology | 0.05 (supplementary) | High SNR requirement | + +The threshold for a positive match is a deployment-specific parameter with a +documented FAR/FRR trade-off. The AETHER channel alone achieves >80% mAP at 5 +subjects (ADR-024 §2.8 target). The fused multi-channel score is expected to +exceed this; the exact improvement is open research, baseline TBD. + +--- + +### 3.7 Rust Type Sketch + +The following sketch shows how the soul signature types would integrate with +the existing codebase. This is a design sketch, not implemented code. + +```rust +// In a future: v2/crates/wifi-densepose-sensing-server/src/soul_signature.rs + +pub const SEG_SOUL_GRAPH: u8 = 0x10; +pub const SEG_SOUL_INDEX: u8 = 0x11; + +/// Complete soul signature as a graph container. +pub struct SoulSignature { + /// Content-addressed identifier: SHA-256 of the RVF blob bytes. + pub content_hash: [u8; 32], + /// Opaque person identifier (never PII directly). + pub person_id: u64, + /// Unix timestamp of enrollment completion (nanoseconds). + pub enrolled_at_ns: u64, + /// Device that performed enrollment. + pub enrolled_by_device_id: String, + /// All graph nodes, typed. + pub nodes: SoulNodes, + /// All graph edges. + pub edges: Vec, + /// Ed25519 witness chain (per ADR-110). + pub witness: WitnessChain, +} + +pub struct SoulNodes { + pub aether_embedding: Option, + pub cardiac_hr: Option, + pub cardiac_waveform: Option, + pub respiratory: Option, + pub gait_timing: Option, + pub skeletal_proportions: Option, + pub subcarrier_reflection: Option, + pub body_field_coupling: Option, +} + +pub struct SoulEdge { + pub edge_type: SoulEdgeType, + pub source_node_type: String, + pub target_node_type: String, + pub weight: f32, // edge attribute (e.g., correlation coefficient) +} + +pub enum SoulEdgeType { + DerivedFrom, + CorrelatesWith, + TemporallyColocated, + TemporallyAfter, + RequiresFieldModel, + Fuses, + AttestedBy, + DerivedByKeypointTracker, + EnvironmentNormalized, +} + +impl SoulSignature { + /// Serialize to an RVF binary blob. + pub fn to_rvf(&self) -> Vec; + /// Deserialize from an RVF binary blob. + pub fn from_rvf(data: &[u8]) -> Result; + /// Compute the weighted match score against a query. + pub fn match_score(&self, query: &SoulQuery, weights: &MatchWeights) -> f32; + /// Check whether all required nodes meet minimum confidence thresholds. + pub fn is_complete(&self, policy: &CompletenessPolicy) -> bool; +} +``` + +--- + +### 3.8 What the Signature Is NOT + +- Not a fingerprint of the room (that is the ADR-030 field model, a separate object). +- Not a waveform recording (the enrolled vectors are statistics and embeddings, not raw CSI). +- Not invertible to the original CSI stream (the AETHER projection head's information bottleneck prevents reconstruction; see ADR-024 §4 Negative consequences). +- Not a single scalar. Reducing to one number for threshold comparison is a deployment decision; the underlying object is a 7-channel graph. +- Not equal to a stored pose. The AETHER embedding captures body dynamics over many windows, not a single body pose at one instant.