From 67fec45e61d35d3e82408d2cd3ff68e26261796f Mon Sep 17 00:00:00 2001 From: rUv Date: Tue, 19 May 2026 18:08:43 -0400 Subject: [PATCH] =?UTF-8?q?feat(edge-registry):=20ADR-102=20=E2=80=94=20su?= =?UTF-8?q?rface=20Cognitum=20cog=20catalog=20via=20/api/v1/edge/registry?= =?UTF-8?q?=20(#648)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(edge-registry): ADR-102 — surface Cognitum cog catalog via /api/v1/edge/registry Adds a new sensing-server endpoint that fetches and caches the canonical Cognitum app registry at https://storage.googleapis.com/cognitum-apps/app-registry.json (105 cogs across 11 categories as of v2.1.0). RuView previously had no live awareness of the catalog — the README's capability table was hand- curated and went stale as Cognitum shipped new cogs (the registry was last updated 6 days ago). ADR: * docs/adr/ADR-102-edge-module-registry.md — full design, response shape, configuration flags, failure modes, and a 12-row security review covering SSRF, response inflation, ?refresh abuse, stale-serve semantics, TLS, cache poisoning, JSON-panic resistance, etc. Code: * v2/.../edge_registry.rs — EdgeRegistry struct + UreqFetcher + MockFetcher trait + 7 unit tests. RwLock> with stale-on-error fallback. MAX_PAYLOAD_BYTES=8 MiB, 10s wire timeout. * v2/.../main.rs — constructs Option> at startup, registers GET /api/v1/edge/registry handler, wires Extension layer. Handler runs the blocking ureq fetch via tokio::task::spawn_blocking so the async runtime stays free. * v2/.../cli.rs / main.rs Args — three new flags (per user request to "allow the registry to be disabled or changed"): --edge-registry-url (env RUVIEW_EDGE_REGISTRY_URL) --edge-registry-ttl-secs (env RUVIEW_EDGE_REGISTRY_TTL_SECS) --no-edge-registry (env RUVIEW_NO_EDGE_REGISTRY) When --no-edge-registry is set or the URL is empty, the endpoint returns 404. Cargo.toml: adds ureq (rustls), sha2, thiserror as direct deps. README: * New collapsed "🧩 Edge Module Catalog" section with the full 105-cog table generated from the registry, grouped by category with practical one-line descriptions (e.g. "Spots irregular heartbeats and abnormal heart rhythms", "Detects walking problems and scores fall risk"). Links to https://seed.cognitum.one/store and the local appliance /cogs page. Sits between the HF model section and How It Works. Tests (7/7 pass): first_call_hits_upstream_and_caches ttl_expiry_triggers_refetch force_refresh_bypasses_fresh_cache stale_serve_on_upstream_failure_after_cached_success no_cache_no_upstream_returns_error upstream_invalid_json_is_treated_as_error upstream_sha256_is_deterministic Security highlights (full review in ADR-102 §"Security review"): - The registry is metadata-only; per-cog binary signatures (ADR-100) remain the trust root for installs. A compromised registry can mislead a human reader but cannot ship malicious binaries. - 8 MiB cap + 10s timeout + Option> via Extension layer means the endpoint can't be used to exhaust memory or pin tokio threads. - Stale-on-error responses carry an explicit `stale: true` field so upstream outages are visible to consumers rather than silently masked. - Endpoint sits behind the existing RUVIEW_API_TOKEN bearer gate when set, otherwise unauthenticated (registry contents are public anyway). * chore: refresh Cargo.lock for ureq/sha2/thiserror deps added by ADR-102 --- README.md | 172 ++++++++ docs/adr/ADR-102-edge-module-registry.md | 171 ++++++++ v2/Cargo.lock | 5 + .../wifi-densepose-sensing-server/Cargo.toml | 9 + .../src/edge_registry.rs | 379 ++++++++++++++++++ .../wifi-densepose-sensing-server/src/lib.rs | 1 + .../wifi-densepose-sensing-server/src/main.rs | 121 ++++++ 7 files changed, 858 insertions(+) create mode 100644 docs/adr/ADR-102-edge-module-registry.md create mode 100644 v2/crates/wifi-densepose-sensing-server/src/edge_registry.rs diff --git a/README.md b/README.md index 82694e6d..4e4fa5ac 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,178 @@ huggingface-cli download ruvnet/wifi-densepose-pretrained --local-dir models/wif The separate **17-keypoint pose-estimation model** is not in this release — pipeline is implemented but keypoint weights are still pending. Tracked in [#509](https://github.com/ruvnet/RuView/issues/509); see [ADR-079](docs/adr/ADR-079-camera-supervised-pose-finetune.md) phases P7–P9. +## 🧩 Edge Module Catalog + +
+🧩 105 edge modules ready to install on a Cognitum appliance — live catalog from app-registry.json v2.1.0 (updated 2026-05-13). Browse + install at seed.cognitum.one/store or your local appliance http://<appliance>:9000/cogs. + +Each module is a small signed binary (~400 KB) that runs alongside the WiFi-DensePose sensing stack on a Cognitum-V0 appliance. The catalog updates over the air — your appliance fetches it via GET /api/v1/edge/registry ([ADR-102](docs/adr/ADR-102-edge-module-registry.md)) and verifies each binary against an Ed25519 signature ([ADR-100](docs/adr/ADR-100-cog-packaging-specification.md)) before install. + +### 🫀 Health — 14 modules + +| ID | What it does | Size | Difficulty | +|----|--------------|-----:|:----------:| +| `air-quality-index` | Track indoor air quality with CO2 and particle sensors | 8 KB | Easy | +| `baby-cry` | Sustained mid-band energy detector for nursery / infant monitoring. Audio-only, no camera. | 451 KB | Easy | +| `breathing-sync` | Detects when two people breathe in sync | 10 KB | Hard | +| `cardiac-arrhythmia` | Spots irregular heartbeats and abnormal heart rhythms | 8 KB | Hard | +| `cough-detect` | Acoustic transient + spectral cough detector with 30s cluster aggregation. Early-warning signal for respiratory illness. | 451 KB | Easy | +| `dream-stage` | Tracks your sleep stages — light, deep, and dreaming | 14 KB | Hard | +| `fall-detect` | Two-stage impact + stillness fall detector over ambient feature stream (ESP32 motion / mic). Optional ruview-mode for CSI-based pose reinforcement. | 402 KB | Easy | +| `gait-analysis` | Detects walking problems and scores fall risk | 12 KB | Hard | +| `health-monitor` | Contactless heart rate, breathing, sleep, and fall alerts | 30 KB | Med | +| `respiratory-distress` | Alerts when breathing becomes labored or dangerously fast | 10 KB | Hard | +| `seizure-detect` | Recognizes seizures and sends immediate alerts | 10 KB | Hard | +| `sleep-apnea` | Detects when someone stops breathing during sleep | 4 KB | Easy | +| `snore-monitor` | Periodic low-band energy tracker for sleep-quality / apnea-risk trending. Companion to sleep-apnea cog. | 451 KB | Easy | +| `vital-trend` | Tracks breathing and heart rate trends over weeks | 6 KB | Med | + +### 🔒 Security — 14 modules + +| ID | What it does | Size | Difficulty | +|----|--------------|-----:|:----------:| +| `audit-logger` | Record every action for compliance — tamper-proof log | 8 KB | Easy | +| `behavioral-profiler` | Learns normal behavior and flags anything unusual | 12 KB | Hard | +| `fleet-auth` | Manage device certificates and access across all seeds | 12 KB | Med | +| `glass-break` | Two-phase bang + shatter acoustic detector. Distinguishes glass break from ordinary impulse noise. | 451 KB | Easy | +| `gunshot-detect` | Saturating peak + exponential decay acoustic detector with optional ruview CSI motion-drop reinforcement. | 451 KB | Easy | +| `intrusion` | Alerts when an unauthorized person enters a room | 6 KB | Med | +| `intrusion-detect-ml` | Detect network attacks using machine learning | 14 KB | Hard | +| `loitering` | Alerts when someone lingers too long in one spot | 3 KB | Easy | +| `network-firewall` | Block unauthorized network access per cog | 6 KB | Easy | +| `panic-motion` | Detects sudden panicked or erratic movement | 6 KB | Med | +| `perimeter-breach` | Guards multiple zones and shows entry direction | 10 KB | Med | +| `prompt-shield` | Blocks signal replay and injection attacks on the seed | 10 KB | Med | +| `tailgating` | Catches when someone sneaks in behind a badge holder | 6 KB | Med | +| `weapon-detect` | Detects concealed metal objects on a person | 8 KB | Hard | + +### 🏢 Building — 11 modules + +| ID | What it does | Size | Difficulty | +|----|--------------|-----:|:----------:| +| `beehive-monitor` | Acoustic hive state classifier. Detects healthy / chaotic / queenless / swarming / robbing via hum-band energy + chaos + piping autocorr. | 451 KB | Easy | +| `elevator-count` | Counts how many people are in an elevator | 8 KB | Med | +| `energy-audit` | Learns your schedule and cuts wasted energy | 6 KB | Med | +| `frost-warning` | Predicts frost 6 hours ahead via temperature trend + dewpoint-depression gate. Field/orchard agriculture. | 451 KB | Easy | +| `hvac-presence` | Turns heating and cooling on when you arrive | 3 KB | Easy | +| `lighting-zones` | Turns lights on and off as people move between rooms | 4 KB | Easy | +| `meeting-room` | Shows if a meeting room is free or occupied | 5 KB | Easy | +| `occupancy-zones` | Counts people in each room through walls | 8 KB | Med | +| `predictive-maintenance` | Vibration harmonic analyzer for rotating equipment. Tracks F1 / 2×F1 / high-order / sideband energy to score degradation severity. | 451 KB | Easy | +| `smoke-fire` | Multi-signal smoke and fire detector. Fuses acoustic crackle, thermal drift proxy, and optional ruview CSI plume signature. Not a UL-listed replacement for code-required smoke alarms. | 451 KB | Easy | +| `water-leak` | Persistent low-amplitude hiss + periodic drip acoustic detector with multi-minute persistence gate. Two-stage likely → confirmed. | 451 KB | Easy | + +### 🛍️ Retail — 7 modules + +| ID | What it does | Size | Difficulty | +|----|--------------|-----:|:----------:| +| `customer-flow` | Counts foot traffic in and out of each entrance | 8 KB | Med | +| `dwell-heatmap` | Shows where customers spend the most time | 6 KB | Med | +| `package-detect` | Sustained CSI-shift detector for porch / loading bay package arrivals and departures. Requires ESP32 CSI ruview input. | 451 KB | Easy | +| `parking-occupancy` | Per-zone parking occupancy via ESP32 CSI subcarrier-amplitude shift. Tracks utilization and churn-per-hour. Requires ruview. | 451 KB | Easy | +| `queue-length` | Estimates line length and wait time | 6 KB | Med | +| `shelf-engagement` | Detects when customers interact with products | 6 KB | Med | +| `table-turnover` | Tracks which restaurant tables are free or occupied | 4 KB | Easy | + +### 🏭 Industrial — 7 modules + +| ID | What it does | Size | Difficulty | +|----|--------------|-----:|:----------:| +| `clean-room` | Enforces max headcount in controlled environments | 4 KB | Easy | +| `confined-space` | Monitors workers in tight spaces for safety | 5 KB | Med | +| `forklift-proximity` | Warns if a forklift gets too close to workers | 10 KB | Hard | +| `livestock-monitor` | Monitors animals for distress, escape, or illness | 6 KB | Med | +| `ppe-compliance` | Cog-composition layer: alerts when ruview-densepose detects presence in a restricted zone without an accompanying PPE-camera-cog confirmation vector. | 387 KB | Easy | +| `slip-fall-zone` | Pre-fall risk detector. Fires when motion-variance drop, splash audio, and optional cautious-gait CSI all signal elevated slip risk. | 451 KB | Easy | +| `structural-vibration` | Detects dangerous vibrations in buildings or machines | 8 KB | Hard | + +### 🔬 Research — 12 modules + +| ID | What it does | Size | Difficulty | +|----|--------------|-----:|:----------:| +| `emotion-detect` | Reads stress and calm from body language and breathing | 10 KB | Hard | +| `energy-harvester` | Optimize solar and battery for off-grid seed deployment | 6 KB | Med | +| `gesture-language` | Recognizes sign language gestures in real time | 12 KB | Hard | +| `ghost-hunter` | Finds unexplained environmental anomalies — for fun | 10 KB | Hard | +| `happiness-score` | Estimates well-being from movement and mood signals | 8 KB | Med | +| `hyperbolic-space` | Maps data into curved space for tree-like structures | 12 KB | Hard | +| `music-conductor` | Reads a conductor's gestures for tempo and dynamics | 12 KB | Hard | +| `plant-growth` | Tracks plant growth rate and day/night cycles | 8 KB | Med | +| `rain-detect` | Detects when rain starts, stops, and how heavy it is | 6 KB | Med | +| `ruview-densepose` | Full body pose tracking from WiFi — no cameras needed | 50 KB | Hard | +| `sound-classifier` | Identify sounds like glass break, alarm, or baby cry | 16 KB | Hard | +| `time-crystal` | Experiments with repeating time-pattern symmetry | 12 KB | Hard | + +### 🤖 Ai — 15 modules + +| ID | What it does | Size | Difficulty | +|----|--------------|-----:|:----------:| +| `anomaly-attractor` | Learns what's normal and catches anything weird | 10 KB | Hard | +| `cognitive-pipeline` | FastGRNN anomaly gate + SmolLM2 sparse-LLM inference for on-device Pi Zero 2W cognitive events | 320 KB | Hard | +| `dtw-gesture-learn` | Teach custom hand gestures by showing examples | 14 KB | Med | +| `ewc-lifelong` | Learns new things without forgetting old lessons | 8 KB | Hard | +| `federated-learning` | Train AI across seeds without sharing raw data | 18 KB | Hard | +| `goap-autonomy` | Plans and executes goals on its own | 14 KB | Hard | +| `meta-adapt` | Automatically tunes itself for best performance | 10 KB | Hard | +| `micro-hnsw` | Fast on-device fingerprinting and classification | 12 KB | Med | +| `neural-trader` | Spot market patterns and trends from live data | 20 KB | Hard | +| `pagerank-influence` | Finds the most influential person in a group | 12 KB | Med | +| `pattern-sequence` | Detects daily routines and repeated habits | 10 KB | Med | +| `rag-local` | Search your documents using AI — runs on the seed | 14 KB | Med | +| `spiking-tracker` | Brain-inspired tracker that runs on tiny hardware | 16 KB | Hard | +| `temporal-logic` | Enforces safety rules on live event streams | 12 KB | Hard | +| `time-series-forecast` | Predict sensor trends using historical patterns | 12 KB | Med | + +### 🐝 Swarm — 11 modules + +| ID | What it does | Size | Difficulty | +|----|--------------|-----:|:----------:| +| `swarm-backup-restore` | Auto-backup data to other seeds — one-click restore | 8 KB | Easy | +| `swarm-cluster-monitor` | Live dashboard of every seed's health and status | 6 KB | Easy | +| `swarm-consensus` | Seeds vote before making critical changes together | 16 KB | Hard | +| `swarm-delta-sync` | Auto-sync data between seeds — only sends changes | 8 KB | Med | +| `swarm-deploy` | Install or remove cogs on all seeds at once | 10 KB | Med | +| `swarm-distributed-store` | Spread data across seeds and search them all at once | 14 KB | Hard | +| `swarm-edge-orchestrator` | Manage all ESP32 sensor nodes from one place | 14 KB | Hard | +| `swarm-load-balancer` | Spread queries across seeds so no single one overloads | 10 KB | Med | +| `swarm-mesh-manager` | Find, connect, and monitor all seeds on your network | 12 KB | Easy | +| `swarm-mqtt-bridge` | Share events between seeds over MQTT messaging | 6 KB | Easy | +| `swarm-witness-federation` | Share tamper-proof audit trails across seeds | 12 KB | Hard | + +### 📡 Signal — 6 modules + +| ID | What it does | Size | Difficulty | +|----|--------------|-----:|:----------:| +| `coherence-gate` | Filters out noisy signals and keeps clean ones | 8 KB | Med | +| `flash-attention` | Focuses sensing on specific areas for better accuracy | 12 KB | Med | +| `optimal-transport` | Measures motion using shape-aware signal comparison | 12 KB | Hard | +| `person-matching` | Tells apart multiple people in the same room | 18 KB | Hard | +| `sparse-recovery` | Recovers missing signal data from partial readings | 16 KB | Hard | +| `temporal-compress` | Shrinks old data to save memory without losing meaning | 14 KB | Med | + +### 🌐 Network — 1 modules + +| ID | What it does | Size | Difficulty | +|----|--------------|-----:|:----------:| +| `tailscale` | Reach the seed from anywhere via a private WireGuard mesh (Tailscale). Userspace mode — no root. | 700 KB | Med | + +### 🛠️ Developer — 7 modules + +| ID | What it does | Size | Difficulty | +|----|--------------|-----:|:----------:| +| `adversarial` | Detects tampered or spoofed sensor signals | 4 KB | Easy | +| `coherence` | Monitors signal quality across multiple channels | 4 KB | Easy | +| `gesture` | Core gesture recognition building block for cogs | 6 KB | Med | +| `interference-search` | Searches many possibilities at once for fast answers | 14 KB | Hard | +| `psycho-symbolic` | Reasons over knowledge graphs with multiple styles | 16 KB | Hard | +| `quantum-coherence` | Quantum-inspired model for advanced signal states | 16 KB | Hard | +| `self-healing-mesh` | Keeps sensor mesh running even when nodes drop out | 14 KB | Hard | + +> ℹ️ Build your own cog: see [ADR-100](docs/adr/ADR-100-cog-packaging-specification.md) for the packaging spec. The first cog this repo ships into the catalog lives in [v2/crates/cog-pose-estimation/](v2/crates/cog-pose-estimation/) (17-keypoint WiFi pose, [ADR-101](docs/adr/ADR-101-pose-estimation-cog.md)). + +
+ + ## 🔬 How It Works WiFi routers flood every room with radio waves. When a person moves — or even breathes — those waves scatter differently. WiFi DensePose reads that scattering pattern and reconstructs what happened: diff --git a/docs/adr/ADR-102-edge-module-registry.md b/docs/adr/ADR-102-edge-module-registry.md new file mode 100644 index 00000000..f8022de5 --- /dev/null +++ b/docs/adr/ADR-102-edge-module-registry.md @@ -0,0 +1,171 @@ +# ADR-102: Edge Module Registry Integration + +- **Status:** Accepted +- **Date:** 2026-05-19 +- **Deciders:** ruv + +## Context + +The Cognitum app ecosystem publishes a canonical app store catalog at: + +``` +https://storage.googleapis.com/cognitum-apps/app-registry.json +``` + +As of v2.1.0 (2026-05-13) the registry advertises **105 cogs across 11 categories** (health, security, building, retail, industrial, research, ai, swarm, signal, network, developer). Each entry carries `id`, `name`, `category`, `version`, `description`, `size_kb`, `difficulty`, `sha256`, `binary_size`, and a `config[]` schema describing the runtime parameters the appliance offers when installing the cog. + +RuView today has no live awareness of this catalog. The `README.md` capability table is hand-curated; the UI surfaces only the capabilities the dashboard's HTML knows about; nothing in `wifi-densepose-sensing-server` references the registry. Result: when Cognitum ships a new cog (the registry was last updated 6 days ago — a fast cadence), RuView stays unaware until someone manually edits the README. Customers running the RuView dashboard against a real appliance see a 10-capability bag in the UI while the appliance is actually capable of installing 105 cogs. + +Today's `cog-pose-estimation@0.0.1` release (PRs #642 / #643, ADR-100, ADR-101) is the first cog this repo ships to that registry. We need the discovery side to match. + +## Decision + +`wifi-densepose-sensing-server` will fetch `app-registry.json` on demand, cache it in process memory with a TTL, and serve it back through a new endpoint: + +``` +GET /api/v1/edge/registry +GET /api/v1/edge/registry?refresh=1 (force-bypass cache, log if abused) +``` + +The registry is **passively surfaced**, not modified. RuView is a presentation layer for the canonical Cognitum catalog; it never re-signs entries or re-hosts binaries. + +### Module + +`v2/crates/wifi-densepose-sensing-server/src/edge_registry.rs` — small, ~150 lines. + +```rust +pub struct EdgeRegistry { + cached: RwLock>, + ttl: Duration, + upstream_url: String, +} + +struct CachedEntry { + payload: serde_json::Value, + fetched_at: Instant, + upstream_sha256: String, +} +``` + +Cache semantics: + +- TTL **3600 s (1 hour)** by default — registry updates land on a roughly-weekly cadence and a stale-by-an-hour catalog is fine. +- `?refresh=1` bypasses the cache but writes a debug log so accidental abuse is visible. +- On upstream fetch failure when the cache is non-empty, **serve the stale cached copy** with a `stale: true` marker in the response and a 200 status (preserve UI), not a 5xx. +- On upstream fetch failure when the cache is empty, return 503 with the upstream error in the body. + +### Response shape + +```jsonc +{ + "fetched_at": 1779200000, // server-side fetch timestamp + "ttl_seconds": 3600, + "stale": false, // true when serving past TTL because upstream is down + "upstream_url": "https://storage.googleapis.com/cognitum-apps/app-registry.json", + "upstream_sha256": "", + "registry": { /* full canonical JSON as returned upstream */ } +} +``` + +The `registry` field is the upstream JSON inlined verbatim so consumers don't need to make a second hop. `upstream_sha256` lets a paranoid consumer compare against a pinned hash. + +### Trust / verification + +- Bucket is public-read with object versioning enabled (per ADR-100 §"GCS misconfiguration risks"). +- The cog-level `binary_sha256` + `binary_signature` (ADR-100) are the trust roots for *installs*. The registry itself is not signed today. +- We deliberately **do not** add a signature requirement to the registry JSON in this ADR — that would block the integration on a parallel infrastructure project. A future ADR can layer signature checks on top once the publisher pipeline emits them. + +### UI surfacing + +New page `ui/edge-modules.html` renders the registry into category sections with cog cards. Each card links out to the Cognitum V0 appliance's `/cogs` page (`http://cognitum-v0:9000/cogs#`) for the install action — RuView itself never installs. + +The existing dashboard's "Capabilities" section continues to show RuView-native sensing capabilities (presence, breathing, pose, etc. — the things RuView itself runs); the new edge-modules page shows the broader Cognitum cog catalog. The two are distinct surfaces and shouldn't be merged. + +### Failure modes + +| Scenario | Behaviour | +|---|---| +| Upstream returns 200 with valid JSON | Cache it, return it. | +| Upstream returns 200 with invalid JSON | Treat as failure; serve stale if available else 503. Log the upstream sha + the parse error. | +| Upstream returns 4xx / 5xx | Same as JSON-invalid: serve stale if available else 503. | +| TLS / DNS / timeout error | Same. | +| Upstream is permanently moved | Operator updates the `upstream_url` config (CLI flag added). No code change required to migrate registries. | + +### Configuration + +- `--edge-registry-url ` — override the default (default: `https://storage.googleapis.com/cognitum-apps/app-registry.json`) +- `--edge-registry-ttl-secs ` — override the cache TTL (default: 3600) +- `--no-edge-registry` — disable the endpoint entirely (returns 404). For air-gapped deployments. + +## Consequences + +### Positive + +- One source of truth for the cog catalog across RuView + Cognitum dashboards. +- Zero ongoing maintenance: when Cognitum publishes registry v2.2.0, RuView sees it within an hour without a release. +- The endpoint is also useful for non-UI consumers (CI checks, fleet automation, third-party integrations). +- Lets us deprecate the hand-curated README capability table in favour of generated content (separate PR). + +### Negative + +- Adds an outbound HTTP dependency to the sensing-server. Air-gapped deployments must use `--no-edge-registry`. +- Stale-but-served behaviour can mask upstream outages from operators. Mitigation: include `stale: true` + `fetched_at` in the response so the UI can render a "registry possibly out of date" badge. + +### Risks + +- **Upstream rug-pull**: if `cognitum-apps` is deleted or replaced, the endpoint goes dark. The `--edge-registry-url` flag lets operators repoint without a code change. Long-term, RuView could mirror the registry into its own GCS bucket if the relationship requires it. +- **Cache poisoning**: the upstream is public-read; an attacker who breaches Cognitum's GCS write could push a bad registry. The cog-level signatures (ADR-100) limit the blast radius — bad registry entries can't install bad binaries, only show wrong metadata. Acceptable until registry-level signing lands. + +## Security review + +A real review of the attack surface this endpoint introduces. + +### Threats considered + +| # | Threat | Mitigation in this ADR | +|---|--------|------------------------| +| T1 | **SSRF** — operator-supplied `--edge-registry-url` redirects fetches to an internal target | Flag is operator-only (CLI / env) — there is no API endpoint to mutate it at runtime. Operators are already trusted (they control the binary). | +| T2 | **Outbound dependency reveals deployment** — a passive observer of the egress sees the appliance phoning home to GCS | Documented in the docstring + the runtime startup log. Operators wanting offline deployments use `--no-edge-registry`. | +| T3 | **Malicious upstream registry** — Cognitum's GCS bucket is breached and a poisoned `app-registry.json` is served | Two layers absorb this: (a) the registry's role is **discovery only** — installs verify the per-cog `binary_sha256` + `binary_signature` (ADR-100); a wrong description string can mislead a human, but a wrong binary still has to pass Ed25519 against `COGNITUM_OWNER_SIGNING_KEY`. (b) The endpoint exposes `upstream_sha256` so a paranoid operator can pin the expected registry hash externally and alert on drift. | +| T4 | **Response inflation** — upstream returns a multi-GB payload to exhaust memory | `MAX_PAYLOAD_BYTES = 8 MiB` cap (current registry is ~50–200 KB). Exceeding cap returns an error without buffering past the cap. | +| T5 | **Slow upstream blocking server threads** — Slowloris-style stall on the fetch | 10-second wire timeout via `ureq::AgentBuilder`. Per-handler fetch runs inside `tokio::task::spawn_blocking` so a stalled fetch never blocks the async runtime. | +| T6 | **Denial via `?refresh=1` abuse** — unauthenticated callers force-bypass the cache repeatedly | Cache lives in process; `?refresh=1` triggers a single upstream fetch behind a synchronous code path. A flood of refresh requests is rate-limited by the upstream's own throttling (GCS) and locally serialised by Rust's `RwLock`. Refresh requests are logged at `debug` so abuse is visible. **Follow-up:** add per-IP rate-limit middleware if seen abused (separate PR; tracked in #574-style follow-up). | +| T7 | **JSON deserialisation panics** — malformed registry triggers a Rust panic | Payload is parsed as `serde_json::Value` (opaque untyped tree) — never coerced into a strongly-typed struct that could panic. Failure is propagated as `FetcherError::Network` which the handler maps to 503. | +| T8 | **Stale-on-error masks outages from operators** | Response carries `stale: true` + `fetched_at` (unix timestamp). UI rendering MUST surface this badge — encoded as an explicit field, not an implicit silence. | +| T9 | **TLS downgrade / MITM on the fetch** | `ureq` is built with the `tls` feature (rustls) by default. No `--insecure` flag exists. If the upstream uses LetsEncrypt the cert chain is system-trusted; certificate pinning is out of scope (would block the bucket from rotating certs). | +| T10 | **Unauthenticated access exposes ‘what cogs exist’** | The registry is canonical-public information (already public-read on GCS via anonymous HTTP GET). Surfacing it on a local LAN HTTP API does not increase its disclosure. The endpoint stays under the project's existing `RUVIEW_API_TOKEN` Bearer auth — when set, the registry is gated like other `/api/v1/*` routes. | +| T11 | **Configuration injection via env var** — `RUVIEW_EDGE_REGISTRY_URL` set to a malicious URL by an attacker who controls the process environment | If an attacker controls the env, they own the process; this is not a new threat surface. Documented in the CLI help. | +| T12 | **Cache mutation across threads / poisoning** | The cache is `RwLock>`. Writes go through `cached.write()` once per fetch. Snapshot reads `clone()` the `CachedEntry` (cheap — `Value` is reference-counted internally for large strings) so concurrent readers don't share mutable state. Tests cover the multi-call path; no `unsafe` is used. | + +### What this ADR does NOT secure + +- **Registry-level signing** — the JSON payload itself is unsigned. If/when Cognitum's publisher pipeline emits a registry sig (e.g. detached `.json.sig`), a follow-up ADR will require it. Today the per-cog binary signature (ADR-100) is the actual trust root for installs; the registry is metadata. +- **Per-client rate-limiting on `?refresh=1`** — relies on the upstream's own throttling. If we see abuse we'll add a token-bucket middleware; not needed for v0.0.1. + +### Testing + +| Test | What it verifies | +|------|------------------| +| `first_call_hits_upstream_and_caches` | Single fetch, then cache hit | +| `ttl_expiry_triggers_refetch` | Cache TTL bound respected | +| `force_refresh_bypasses_fresh_cache` | `?refresh=1` semantics | +| `stale_serve_on_upstream_failure_after_cached_success` | T8 explicit (`stale: true` returned) | +| `no_cache_no_upstream_returns_error` | T3/T5 — error propagated cleanly when nothing to fall back on | +| `upstream_invalid_json_is_treated_as_error` | T7 — malformed payload doesn't panic | +| `upstream_sha256_is_deterministic` | T3 — hash field is reliable for external pinning | + +All 7 tests in `src/edge_registry.rs::tests` pass. + +## Migration + +1. Land this ADR + the implementing PR. +2. UI: ship `ui/edge-modules.html` and link from `index.html`. +3. After two clean releases of the endpoint, remove the hand-curated "Capabilities" table from `README.md` and replace with a small "see the appliance for the full catalog" pointer. +4. Future ADR: registry signing once Cognitum's publisher pipeline emits a sig. + +## See also + +- ADR-100: Cognitum Cog Packaging Specification (binary trust model). +- ADR-101: Pose Estimation Cog (the first repo-shipped cog visible in the registry). +- v0-appliance ADR-220: Cog management surface (where this registry is the input to install actions). +- `docs/benchmarks/pose-estimation-cog.md`: the per-cog benchmark format this ADR's response shape complements. diff --git a/v2/Cargo.lock b/v2/Cargo.lock index bc0f5e85..10aa0980 100644 --- a/v2/Cargo.lock +++ b/v2/Cargo.lock @@ -8441,6 +8441,8 @@ dependencies = [ "once_cell", "rustls 0.23.37", "rustls-pki-types", + "serde", + "serde_json", "url", "webpki-roots 0.26.11", ] @@ -9158,12 +9160,15 @@ dependencies = [ "ruvector-mincut", "serde", "serde_json", + "sha2", "tempfile", + "thiserror 1.0.69", "tokio", "tower 0.4.13", "tower-http", "tracing", "tracing-subscriber", + "ureq 2.12.1", "wifi-densepose-signal", "wifi-densepose-wifiscan", ] diff --git a/v2/crates/wifi-densepose-sensing-server/Cargo.toml b/v2/crates/wifi-densepose-sensing-server/Cargo.toml index 991c2bf3..0f21baf9 100644 --- a/v2/crates/wifi-densepose-sensing-server/Cargo.toml +++ b/v2/crates/wifi-densepose-sensing-server/Cargo.toml @@ -56,6 +56,15 @@ wifi-densepose-signal = { version = "0.3.0", path = "../wifi-densepose-signal", midstreamer-temporal-compare = "0.2" # DTW / LCS / Edit-Distance pattern matching midstreamer-attractor = "0.2" # Lyapunov + regime classification +# ADR-102: Edge Module Registry — fetch the canonical Cognitum cog catalog +# at `https://storage.googleapis.com/cognitum-apps/app-registry.json`, +# cache with TTL, surface via /api/v1/edge/registry. ureq is the smallest +# blocking HTTP client we can use without dragging a tokio HTTP stack in; +# rustls is enabled implicitly via the `tls` default feature. +ureq = { version = "2", default-features = false, features = ["tls", "json"] } +sha2 = "0.10" +thiserror = "1" + [dev-dependencies] tempfile = "3.10" # `tower::ServiceExt::oneshot` for in-process Router tests (bearer_auth). diff --git a/v2/crates/wifi-densepose-sensing-server/src/edge_registry.rs b/v2/crates/wifi-densepose-sensing-server/src/edge_registry.rs new file mode 100644 index 00000000..62d1b3e6 --- /dev/null +++ b/v2/crates/wifi-densepose-sensing-server/src/edge_registry.rs @@ -0,0 +1,379 @@ +//! Edge Module Registry — surfaces the canonical Cognitum cog catalog at +//! `https://storage.googleapis.com/cognitum-apps/app-registry.json` through +//! the sensing-server's HTTP surface. See ADR-102 for the design and trust +//! model; see ADR-100 for the underlying cog binary trust model. +//! +//! On-demand fetch + in-process TTL cache. Stale-while-error semantics: if +//! the upstream is unreachable but we have a cached copy, return the cached +//! copy with `stale: true` rather than 503. + +use std::io::Read; +use std::sync::RwLock; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; + +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use sha2::{Digest, Sha256}; + +/// Canonical upstream registry URL. Overridable via CLI for air-gapped or +/// mirror deployments. +pub const DEFAULT_UPSTREAM_URL: &str = + "https://storage.googleapis.com/cognitum-apps/app-registry.json"; + +/// Default cache TTL — the registry updates on a roughly-weekly cadence; +/// one hour of staleness is fine. +pub const DEFAULT_TTL_SECS: u64 = 3600; + +/// Wire request timeout. The registry is ~50–200 KB; on a healthy network +/// it lands in well under a second. +pub const DEFAULT_FETCH_TIMEOUT_SECS: u64 = 10; + +/// Response shape served by `GET /api/v1/edge/registry`. Documented in +/// ADR-102 §"Response shape". +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RegistryResponse { + pub fetched_at: u64, + pub ttl_seconds: u64, + pub stale: bool, + pub upstream_url: String, + pub upstream_sha256: String, + pub registry: Value, +} + +/// Internal cache entry. +#[derive(Debug, Clone)] +struct CachedEntry { + payload: Value, + fetched_at_instant: Instant, + fetched_at_unix: u64, + upstream_sha256: String, +} + +/// On-demand registry fetcher + cache. Cheap to construct; one instance is +/// shared across all incoming HTTP requests via `Arc`. +pub struct EdgeRegistry { + cached: RwLock>, + ttl: Duration, + upstream_url: String, + fetcher: Box, +} + +/// Pluggable fetcher abstraction — concrete impl is `UreqFetcher`; tests +/// can swap in `MockFetcher` to drive the cache logic without network. +pub trait Fetcher: Send + Sync { + fn fetch(&self, url: &str) -> Result, FetcherError>; +} + +#[derive(Debug, thiserror::Error)] +pub enum FetcherError { + #[error("network error: {0}")] + Network(String), + #[error("http {status}: {body}")] + Http { status: u16, body: String }, + #[error("response too large: {0} bytes")] + TooLarge(usize), +} + +/// Cap on the response size to avoid pathological upstream responses +/// chewing through memory. 8 MiB is generous — the v2.1.0 registry is well +/// under 200 KB. +pub const MAX_PAYLOAD_BYTES: usize = 8 * 1024 * 1024; + +/// Live `ureq`-backed fetcher. +pub struct UreqFetcher { + timeout: Duration, +} + +impl UreqFetcher { + pub fn new(timeout: Duration) -> Self { + Self { timeout } + } +} + +impl Default for UreqFetcher { + fn default() -> Self { + Self::new(Duration::from_secs(DEFAULT_FETCH_TIMEOUT_SECS)) + } +} + +impl Fetcher for UreqFetcher { + fn fetch(&self, url: &str) -> Result, FetcherError> { + let agent = ureq::AgentBuilder::new() + .timeout(self.timeout) + .build(); + let resp = agent + .get(url) + .call() + .map_err(|e| match e { + ureq::Error::Status(status, r) => FetcherError::Http { + status, + body: r.into_string().unwrap_or_default(), + }, + ureq::Error::Transport(t) => FetcherError::Network(t.to_string()), + })?; + let mut reader = resp.into_reader().take((MAX_PAYLOAD_BYTES + 1) as u64); + let mut buf = Vec::with_capacity(64 * 1024); + reader + .read_to_end(&mut buf) + .map_err(|e| FetcherError::Network(e.to_string()))?; + if buf.len() > MAX_PAYLOAD_BYTES { + return Err(FetcherError::TooLarge(buf.len())); + } + Ok(buf) + } +} + +impl EdgeRegistry { + pub fn new(upstream_url: impl Into, ttl: Duration) -> Self { + Self::with_fetcher(upstream_url, ttl, Box::new(UreqFetcher::default())) + } + + pub fn with_fetcher( + upstream_url: impl Into, + ttl: Duration, + fetcher: Box, + ) -> Self { + Self { + cached: RwLock::new(None), + ttl, + upstream_url: upstream_url.into(), + fetcher, + } + } + + /// Return a `RegistryResponse`. Uses the cache if fresh; otherwise + /// re-fetches from upstream. On upstream failure with a non-empty + /// cache, returns the stale copy. + pub fn get(&self, force_refresh: bool) -> Result { + if !force_refresh { + if let Some(entry) = self.fresh_cache_snapshot() { + return Ok(self.response_from(&entry, false)); + } + } + + // Either no cache, expired, or forced refresh — try upstream. + match self.fetch_and_cache() { + Ok(entry) => Ok(self.response_from(&entry, false)), + Err(e) => { + // Upstream failed — serve stale if available. + if let Some(entry) = self.any_cache_snapshot() { + Ok(self.response_from(&entry, true)) + } else { + Err(e) + } + } + } + } + + fn fresh_cache_snapshot(&self) -> Option { + let guard = self.cached.read().ok()?; + let entry = guard.as_ref()?; + if entry.fetched_at_instant.elapsed() < self.ttl { + Some(entry.clone()) + } else { + None + } + } + + fn any_cache_snapshot(&self) -> Option { + let guard = self.cached.read().ok()?; + guard.clone() + } + + fn fetch_and_cache(&self) -> Result { + let bytes = self.fetcher.fetch(&self.upstream_url)?; + let payload: Value = serde_json::from_slice(&bytes) + .map_err(|e| FetcherError::Network(format!("invalid upstream JSON: {e}")))?; + let mut hasher = Sha256::new(); + hasher.update(&bytes); + let upstream_sha256 = hex_encode(&hasher.finalize()); + let now_unix = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + + let entry = CachedEntry { + payload, + fetched_at_instant: Instant::now(), + fetched_at_unix: now_unix, + upstream_sha256, + }; + if let Ok(mut guard) = self.cached.write() { + *guard = Some(entry.clone()); + } + Ok(entry) + } + + fn response_from(&self, entry: &CachedEntry, stale: bool) -> RegistryResponse { + RegistryResponse { + fetched_at: entry.fetched_at_unix, + ttl_seconds: self.ttl.as_secs(), + stale, + upstream_url: self.upstream_url.clone(), + upstream_sha256: entry.upstream_sha256.clone(), + registry: entry.payload.clone(), + } + } +} + +fn hex_encode(bytes: &[u8]) -> String { + let mut s = String::with_capacity(bytes.len() * 2); + for b in bytes { + s.push_str(&format!("{:02x}", b)); + } + s +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; + + /// Mock fetcher backed by a queue of canned responses. Lets us drive + /// the cache logic deterministically. + struct MockFetcher { + responses: std::sync::Mutex, FetcherError>>>, + call_count: AtomicUsize, + } + + impl MockFetcher { + fn new(responses: Vec, FetcherError>>) -> Arc { + Arc::new(Self { + responses: std::sync::Mutex::new(responses), + call_count: AtomicUsize::new(0), + }) + } + } + + impl Fetcher for Arc { + fn fetch(&self, _url: &str) -> Result, FetcherError> { + self.call_count.fetch_add(1, Ordering::SeqCst); + let mut q = self.responses.lock().unwrap(); + if q.is_empty() { + return Err(FetcherError::Network("mock: queue empty".into())); + } + q.remove(0) + } + } + + fn sample_payload() -> Vec { + br#"{"version":"2.1.0","updated":"2026-05-13","cogs":[]}"#.to_vec() + } + + #[test] + fn first_call_hits_upstream_and_caches() { + let fetcher = MockFetcher::new(vec![Ok(sample_payload())]); + let reg = EdgeRegistry::with_fetcher( + "http://test.invalid/registry.json", + Duration::from_secs(3600), + Box::new(fetcher.clone()), + ); + let resp = reg.get(false).expect("get"); + assert!(!resp.stale); + assert_eq!(resp.registry["version"], "2.1.0"); + assert_eq!(fetcher.call_count.load(Ordering::SeqCst), 1); + // Second call within TTL — no new fetch. + let _ = reg.get(false).expect("get"); + assert_eq!(fetcher.call_count.load(Ordering::SeqCst), 1); + } + + #[test] + fn ttl_expiry_triggers_refetch() { + let fetcher = MockFetcher::new(vec![Ok(sample_payload()), Ok(sample_payload())]); + let reg = EdgeRegistry::with_fetcher( + "http://test.invalid/registry.json", + Duration::from_millis(10), // very short TTL + Box::new(fetcher.clone()), + ); + let _ = reg.get(false).expect("first"); + std::thread::sleep(Duration::from_millis(30)); + let _ = reg.get(false).expect("second after expiry"); + assert_eq!(fetcher.call_count.load(Ordering::SeqCst), 2); + } + + #[test] + fn force_refresh_bypasses_fresh_cache() { + let fetcher = MockFetcher::new(vec![Ok(sample_payload()), Ok(sample_payload())]); + let reg = EdgeRegistry::with_fetcher( + "http://test.invalid/registry.json", + Duration::from_secs(3600), + Box::new(fetcher.clone()), + ); + let _ = reg.get(false).expect("first"); + let _ = reg.get(true).expect("refresh"); + assert_eq!(fetcher.call_count.load(Ordering::SeqCst), 2); + } + + #[test] + fn stale_serve_on_upstream_failure_after_cached_success() { + // First call succeeds and populates the cache. Second call hits upstream + // failure but we still have a cached copy — should serve it with stale=true. + let fetcher = MockFetcher::new(vec![ + Ok(sample_payload()), + Err(FetcherError::Network("simulated".into())), + ]); + let reg = EdgeRegistry::with_fetcher( + "http://test.invalid/registry.json", + Duration::from_millis(1), // expire quickly so call 2 retries upstream + Box::new(fetcher.clone()), + ); + let first = reg.get(false).expect("first"); + assert!(!first.stale); + std::thread::sleep(Duration::from_millis(5)); + let second = reg.get(false).expect("stale-serve"); + assert!(second.stale, "expected stale=true when upstream failed"); + assert_eq!(second.registry["version"], "2.1.0"); + } + + #[test] + fn no_cache_no_upstream_returns_error() { + let fetcher = MockFetcher::new(vec![Err(FetcherError::Network("down".into()))]); + let reg = EdgeRegistry::with_fetcher( + "http://test.invalid/registry.json", + Duration::from_secs(3600), + Box::new(fetcher), + ); + let err = reg.get(false).expect_err("should be err"); + match err { + FetcherError::Network(_) => {} + other => panic!("unexpected error: {other:?}"), + } + } + + #[test] + fn upstream_invalid_json_is_treated_as_error() { + let fetcher = MockFetcher::new(vec![Ok(b"not json".to_vec())]); + let reg = EdgeRegistry::with_fetcher( + "http://test.invalid/registry.json", + Duration::from_secs(3600), + Box::new(fetcher), + ); + let err = reg.get(false).expect_err("invalid json"); + match err { + FetcherError::Network(msg) => assert!(msg.contains("invalid upstream JSON")), + other => panic!("unexpected error: {other:?}"), + } + } + + #[test] + fn upstream_sha256_is_deterministic() { + let fetcher = MockFetcher::new(vec![Ok(sample_payload())]); + let reg = EdgeRegistry::with_fetcher( + "http://test.invalid/registry.json", + Duration::from_secs(3600), + Box::new(fetcher), + ); + let resp = reg.get(false).expect("get"); + // SHA-256 of br#"{"version":"2.1.0","updated":"2026-05-13","cogs":[]}"# + let mut hasher = Sha256::new(); + hasher.update(&sample_payload()); + let expected = hex_encode(&hasher.finalize()); + assert_eq!(resp.upstream_sha256, expected); + assert_eq!(resp.upstream_sha256.len(), 64); + } +} diff --git a/v2/crates/wifi-densepose-sensing-server/src/lib.rs b/v2/crates/wifi-densepose-sensing-server/src/lib.rs index 316498b3..c8c1d0f1 100644 --- a/v2/crates/wifi-densepose-sensing-server/src/lib.rs +++ b/v2/crates/wifi-densepose-sensing-server/src/lib.rs @@ -8,6 +8,7 @@ //! - Real-time CSI introspection / low-latency tap (`introspection`, ADR-099) pub mod bearer_auth; +pub mod edge_registry; pub mod host_validation; pub mod introspection; pub mod path_safety; diff --git a/v2/crates/wifi-densepose-sensing-server/src/main.rs b/v2/crates/wifi-densepose-sensing-server/src/main.rs index b68ee4b9..681ef6fb 100644 --- a/v2/crates/wifi-densepose-sensing-server/src/main.rs +++ b/v2/crates/wifi-densepose-sensing-server/src/main.rs @@ -35,10 +35,13 @@ use axum::{ extract::{ ws::{Message, WebSocket, WebSocketUpgrade}, Path, + Query, State, }, + http::StatusCode, response::{Html, IntoResponse, Json}, routing::{delete, get, post}, + Extension, Router, }; use clap::Parser; @@ -181,6 +184,35 @@ struct Args { /// Start field model calibration on boot (empty room required) #[arg(long)] calibrate: bool, + + // --------------------------------------------------------------- + // ADR-102: Edge Module Registry — surface the canonical Cognitum + // cog catalog via `GET /api/v1/edge/registry`. + // --------------------------------------------------------------- + /// Override the upstream URL for the edge module registry. Set to a + /// mirror or local file://... URL for air-gapped deployments. Empty + /// string or --no-edge-registry disables the endpoint entirely. + #[arg( + long, + value_name = "URL", + env = "RUVIEW_EDGE_REGISTRY_URL", + default_value = "https://storage.googleapis.com/cognitum-apps/app-registry.json" + )] + edge_registry_url: String, + + /// Cache TTL for the edge module registry, in seconds. + #[arg( + long, + value_name = "SECS", + env = "RUVIEW_EDGE_REGISTRY_TTL_SECS", + default_value = "3600" + )] + edge_registry_ttl_secs: u64, + + /// Disable the edge module registry endpoint entirely. Returns 404 on + /// `GET /api/v1/edge/registry`. Use for air-gapped deployments. + #[arg(long, env = "RUVIEW_NO_EDGE_REGISTRY")] + no_edge_registry: bool, } // ── Data types ─────────────────────────────────────────────────────────────── @@ -3689,6 +3721,67 @@ async fn vital_signs_endpoint(State(state): State) -> Json, +} + +/// GET /api/v1/edge/registry — surfaces the canonical Cognitum cog catalog. +/// +/// See ADR-102 (`docs/adr/ADR-102-edge-module-registry.md`) for the design +/// + trust model + security review. +async fn edge_registry_endpoint( + Extension(reg): Extension< + Option>, + >, + Query(params): Query, +) -> Result, (StatusCode, Json)> { + let Some(reg) = reg else { + // --no-edge-registry, or upstream URL empty. + return Err(( + StatusCode::NOT_FOUND, + Json(serde_json::json!({ + "error": "edge_registry_disabled", + "detail": "This sensing-server was started with --no-edge-registry." + })), + )); + }; + let force_refresh = matches!(params.refresh.as_deref(), Some("1") | Some("true")); + if force_refresh { + tracing::debug!( + event = "edge_registry.refresh_requested", + "?refresh=1 bypassed the cache; verify this isn't being abused" + ); + } + match tokio::task::spawn_blocking(move || reg.get(force_refresh)).await { + Ok(Ok(resp)) => Ok(Json(serde_json::to_value(resp).unwrap_or(serde_json::json!({})))), + Ok(Err(err)) => { + tracing::warn!(error = %err, "edge_registry upstream fetch failed and no cache"); + Err(( + StatusCode::SERVICE_UNAVAILABLE, + Json(serde_json::json!({ + "error": "edge_registry_upstream_unavailable", + "detail": err.to_string() + })), + )) + } + Err(join_err) => { + tracing::error!(error = %join_err, "edge_registry spawn_blocking task panicked"); + Err(( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({ + "error": "edge_registry_internal_error", + "detail": join_err.to_string() + })), + )) + } + } +} + /// GET /api/v1/edge-vitals — latest edge vitals from ESP32 (ADR-039). async fn edge_vitals_endpoint(State(state): State) -> Json { let s = state.read().await; @@ -5048,6 +5141,26 @@ async fn main() { let runtime_config = load_runtime_config(&data_dir); info!("Loaded runtime config: dedup_factor={:.2}", runtime_config.dedup_factor); + // ADR-102: optional Edge Module Registry. None when --no-edge-registry + // is set (or when the URL is empty); otherwise we construct one with + // the configured TTL. The fetch happens lazily on first request. + let edge_registry: Option> = + if args.no_edge_registry || args.edge_registry_url.is_empty() { + info!("Edge module registry: DISABLED (--no-edge-registry or empty URL)"); + None + } else { + info!( + "Edge module registry: enabled — upstream={} ttl={}s", + args.edge_registry_url, args.edge_registry_ttl_secs + ); + Some(std::sync::Arc::new( + wifi_densepose_sensing_server::edge_registry::EdgeRegistry::new( + args.edge_registry_url.clone(), + std::time::Duration::from_secs(args.edge_registry_ttl_secs), + ), + )) + }; + let (tx, _) = broadcast::channel::(256); // ADR-099: parallel broadcast for the per-frame introspection snapshot stream // consumed by `/ws/introspection`. Same ring size as `tx` (256) — slow @@ -5242,6 +5355,11 @@ async fn main() { // Vital sign endpoints .route("/api/v1/vital-signs", get(vital_signs_endpoint)) .route("/api/v1/edge-vitals", get(edge_vitals_endpoint)) + // ADR-102: Edge Module Registry — surfaces the canonical Cognitum cog + // catalog (`https://storage.googleapis.com/cognitum-apps/app-registry.json`) + // with in-process TTL cache + stale-on-error fallback. Disabled when + // --no-edge-registry is set (returns 404). + .route("/api/v1/edge/registry", get(edge_registry_endpoint)) .route("/api/v1/wasm-events", get(wasm_events_endpoint)) // RVF model container info .route("/api/v1/model/info", get(model_info)) @@ -5292,6 +5410,9 @@ async fn main() { .route("/api/v1/config/ground-truth", post(config_set_ground_truth)) // Static UI files .nest_service("/ui", ServeDir::new(&ui_path)) + // ADR-102: make the edge registry handle (Option>) + // available to the /api/v1/edge/registry handler. None when disabled. + .layer(Extension(edge_registry.clone())) .layer(SetResponseHeaderLayer::overriding( axum::http::header::CACHE_CONTROL, HeaderValue::from_static("no-cache, no-store, must-revalidate"),