From ca975276460e2fb865e5dbf23baa3aa0b0a12afd Mon Sep 17 00:00:00 2001 From: ruv Date: Wed, 13 May 2026 23:29:37 -0400 Subject: [PATCH] =?UTF-8?q?feat(introspection):=20I6=20=E2=80=94=20regime-?= =?UTF-8?q?changed=20signal=20+=20per-frame=20analyze=20+=20honest=20ADR-0?= =?UTF-8?q?99=20D8=20amendment?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three threads in this commit: 1) Per-frame attractor analysis (default analyze_every_n: 8 → 1). The I5 benchmark put per-frame update at 0.012 ms p99 — 83× under D4's 1 ms budget. The cost case for the every-8th-frame default doesn't hold; per-frame analysis is what makes regime_changed a viable early-detection trigger. 2) New `regime_changed: bool` field in IntrospectionSnapshot — flips on any frame whose attractor regime classification differs from the previous frame's. Pairs with top_k_similarity (full-shape match) to give downstream consumers two latencies with different robustness profiles. 3) Honest amendment of ADR-099 D8 to reflect empirical reality: - L1 stand-in achieves 3.20× ratio (5-frame shape match vs 16-frame event-path floor); the 10× aspirational bar is architecturally unreachable at 1-D scalar feature resolution. - regime_changed didn't fire in the 10-frame motion window — the 200-frame noise trajectory dominates the Lyapunov classification, and short perturbations don't shift the regime fast enough on a scalar feature. - Path to 10×: ADR-208 Phase 2 (Hailo NPU vec128 embeddings) — multi-dim partial matches discriminate from noise in 1-2 frames, not 5. - Side finding: midstream temporal-compare::DTW uses *discrete equality* cost (designed for LLM tokens), not numeric distance — swapping it in for f64 amplitude scoring would be strictly worse than the L1 stand-in. A numeric DTW is a separate concern (hand-roll or new crate). - Revised D8: ship behind --introspection (off by default) until multi- dim features land. Per-frame update budget IS met (0.041 ms p99 in this bench, ~24× under the 1 ms bar) — the feature is cheap enough to carry dark today. cargo test -p wifi-densepose-sensing-server --no-default-features: introspection (lib): 8 passed, 0 failed introspection_latency (test): 5 passed, 0 failed (incl. new regime_change_path_latency) clippy: clean on the introspection surface (pre-existing approx_constant lints in pose.rs / main.rs unchanged). Co-Authored-By: claude-flow --- .../ADR-099-midstream-introspection-tap.md | 22 +++- .../src/introspection.rs | 27 ++++- .../tests/introspection_latency.rs | 112 ++++++++++++++---- 3 files changed, 134 insertions(+), 27 deletions(-) diff --git a/docs/adr/ADR-099-midstream-introspection-tap.md b/docs/adr/ADR-099-midstream-introspection-tap.md index 0f8c353d..a60d71bf 100644 --- a/docs/adr/ADR-099-midstream-introspection-tap.md +++ b/docs/adr/ADR-099-midstream-introspection-tap.md @@ -118,9 +118,27 @@ Three reference signatures ship under `signatures/` in the crate as developer fi ### D8 — Measurement-first adoption — promotion bar is empirical -Phase 0 spike measures the latency win against the existing `/ws/sensing` path on a recorded session. **Promotion to "ship by default" requires ≥10× p99 latency reduction on the "motion shape recognized" event class**, measured on at least one labelled recording. If the bar isn't met, the feature lives behind an `--introspection` CLI flag (default off) until it is. +Phase 0 spike measures the latency win against the existing `/ws/sensing` path on a recorded session. **Original aspirational bar: ≥10× p99 latency reduction on the "motion shape recognized" event class**, measured on at least one labelled recording. -*Consequences:* this isn't an architectural bet — the value claim is verifiable, and the feature carries its own kill switch if reality disagrees with theory. +**Empirical baseline from `tests/introspection_latency.rs`** (I5/I6 — host-side L1 stand-in scoring + midstream-attractor regime classification on a 1-D mean-amplitude feature, 5-frame motion-ramp signature, 200 frames of noise warm-up, `analyze_every_n = 1`): + +| Signal | Frames to recognise | Ratio vs event-path floor (16) | +|---|---|---| +| `top_k_similarity[0].above_threshold` | 5 | **3.20×** | +| `regime_changed` (10-frame motion window) | did not fire | — | +| Per-frame `update()` p99 | **0.041 ms** (~24× under D4's 1 ms budget) | — | + +The 10× bar is **architecturally unreachable** at the 1-D scalar feature resolution this stand-in operates at — `signature_score`'s length-normalised L1 needs roughly the full signature length of in-shape frames to discriminate from noise (any shortcut trades false positives), and the attractor's Lyapunov classification needs more than a 10-frame perturbation to overcome a long noise trajectory. The 3.2× ratio is the structural ceiling for this feature class. + +**Closing the gap to 10× requires multi-dim features — specifically the `vec128` embeddings from ADR-208 Phase 2 (Hailo NPU)** — where partial matches become statistically distinguishable from noise after 1–2 frames, not 5. Until then, the adoption decision **revises the bar**: + +* **Ship behind `--introspection` (off by default)** until either ADR-208 P2 lands a multi-dim feature path, *or* the L1 stand-in is replaced with a numeric DTW that scores partial-prefix matches at acceptable false-positive rates. +* The per-frame `update()` cost bar (D4: ≤1 ms p99) **is met** — the feature is cheap enough to carry dark today. +* **Two parallel signals** in the snapshot (`top_k_similarity` for shape match, `regime_changed` for trajectory shift) cover different latency / robustness trade-offs — neither alone clears 10× on a 1-D scalar, but they cover complementary use cases. Downstream consumers pick. + +> **Side finding on midstream's `temporal-compare::DTW`**: its DTW uses *discrete equality* cost (0/1 between elements), not numeric distance — it's designed for LLM token sequences. On `f64` amplitude values, that scoring would be strictly worse than the L1 stand-in (every cell costs 1, no useful gradient). "Swap in midstream's DTW" — implied in earlier revisions of this ADR and proposed in I5/I6 — therefore isn't the optimization that closes D8. A *numeric* DTW would need to be hand-rolled or pulled from a different crate; tracked as a P1 follow-up alongside ADR-208 P2. + +*Consequences:* the kill switch is real (off-by-default CLI flag); the architectural value (continuous-state introspection surface + a per-frame regime signal + a cheap shape-match probe + a verified ≤1 ms update budget) ships, with the *latency-win* bar deferred to when multi-dim features arrive. --- diff --git a/v2/crates/wifi-densepose-sensing-server/src/introspection.rs b/v2/crates/wifi-densepose-sensing-server/src/introspection.rs index 66484c5a..140706e3 100644 --- a/v2/crates/wifi-densepose-sensing-server/src/introspection.rs +++ b/v2/crates/wifi-densepose-sensing-server/src/introspection.rs @@ -45,11 +45,13 @@ pub const DEFAULT_EMBEDDING_DIM: usize = 1; /// matches the snapshot carries. pub const DEFAULT_TOP_K: usize = 5; -/// Frames since the last `analyze()` call. We don't analyse on every frame — -/// the attractor's Lyapunov estimate is ~9 ms for a 1 k-point window per -/// midstream's bench, which is fine at 30 Hz but wastes CPU at higher rates. -/// One analysis every N frames stays well under the per-frame budget. -pub const DEFAULT_ANALYZE_EVERY_N_FRAMES: u32 = 8; +/// Frames since the last `analyze()` call. Per-frame analyse is cheap (the +/// I5 benchmark put attractor + L1-scoring update p99 at 0.012 ms on a +/// desktop runner, ~83× under the 1 ms D4 budget — even on a Pi 5 we have +/// orders of magnitude of headroom), and per-frame analyse is what makes +/// the `regime_changed` snapshot signal viable as an early-detection +/// trigger. Default to **every frame** unless deployment tunes it down. +pub const DEFAULT_ANALYZE_EVERY_N_FRAMES: u32 = 1; /// One labelled segment of derived feature vectors used as a DTW pattern. /// Schema (per ADR-099 D7) — JSON-loaded from `signatures/*.json` at startup. @@ -153,6 +155,12 @@ pub struct IntrospectionSnapshot { /// Analyzer confidence in `[0, 1]`. `0.0` until the analyzer has enough /// data; tracks midstream's `AttractorInfo::confidence`. pub attractor_confidence: f64, + /// `true` when this frame's regime classification differs from the + /// previous frame's — an **early-detection signal** that doesn't require + /// a full signature length of frames to fire (ADR-099 D8: a parallel + /// fast path to the shape-match latency, useful for "something changed, + /// look closer" semantics on dashboards / downstream consumers). + pub regime_changed: bool, /// Top-k DTW matches against the loaded signature library. Empty when the /// library is empty or no signatures rose above the score floor. pub top_k_similarity: Vec, @@ -227,6 +235,7 @@ impl IntrospectionState { lyapunov_exponent: None, attractor_dim: cfg.embedding_dim, attractor_confidence: 0.0, + regime_changed: false, top_k_similarity: Vec::new(), }, } @@ -263,6 +272,7 @@ impl IntrospectionState { // Run the (relatively expensive) analyze step every Nth frame; in // between, keep the previous regime/Lyapunov in the snapshot — they're // smooth signals, not edge-sensitive. + let prev_regime = self.last_snapshot.regime; self.frames_since_analyze = self.frames_since_analyze.saturating_add(1); if self.frames_since_analyze >= self.analyze_every_n { self.frames_since_analyze = 0; @@ -278,6 +288,13 @@ impl IntrospectionState { Err(other) => return Err(other), } } + // ADR-099 D8: early-detection signal — `regime_changed` flips on any + // frame whose classification differs from the previous frame's. Pairs + // with `top_k_similarity` (which needs the full shape) to give + // downstream consumers two latencies to choose from per use case. + // Don't count Unknown→Unknown as a change; do count Unknown→ as + // a change (the warm-up moment is itself informative). + self.last_snapshot.regime_changed = prev_regime != self.last_snapshot.regime; // DTW scoring runs every frame; cheap when the library is small (and // empty when it's empty). See `score_signatures` for the metric. diff --git a/v2/crates/wifi-densepose-sensing-server/tests/introspection_latency.rs b/v2/crates/wifi-densepose-sensing-server/tests/introspection_latency.rs index 47469eb9..715cc8dd 100644 --- a/v2/crates/wifi-densepose-sensing-server/tests/introspection_latency.rs +++ b/v2/crates/wifi-densepose-sensing-server/tests/introspection_latency.rs @@ -66,54 +66,92 @@ fn motion_signature() -> Signature { } } +/// Result of one motion-onset benchmark run: how many frames until each +/// detection signal first fires, plus per-frame `update()` wall-clock costs. +struct LatencyMeasurement { + /// Frames into the motion before `top_k_similarity[0].above_threshold` is + /// true (the "shape recognised" full-pattern path). + shape_match_frames: usize, + /// Frames into the motion before `regime_changed` is true (the parallel + /// fast-detection path added in I6). `None` if it never fired in the + /// measurement window — meaning the regime classification stayed at + /// whatever it was during warm-up. + regime_change_frames: Option, + /// Per-frame `update()` wall-clock samples (ms). + update_ms: Vec, +} + /// Feed N background-noise frames followed by the motion ramp; return the -/// 0-based frame index at which the snapshot first reports `above_threshold`. -fn frames_until_shape_recognised() -> (usize, Vec) { +/// 0-based frame index at which each detection signal first fires. +fn measure_motion_onset() -> LatencyMeasurement { let lib = SignatureLibrary::from_signatures(vec![motion_signature()]); let cfg = IntrospectionConfig { trajectory_len: 128, embedding_dim: 1, - analyze_every_n: 8, + // I6: analyze on every frame so the regime-change signal is responsive. + analyze_every_n: 1, library: lib, }; let mut state = IntrospectionState::with_config(cfg); - // 100 frames of background noise — small drifty values around 0. - let mut frame_idx = 0usize; - let mut update_ms = Vec::with_capacity(125); - for k in 0..100u64 { + // 200 frames of background noise — small drifty values around 0. We feed + // 200 (not 100) so the attractor analyzer is past its 100-point warm-up + // *before* the motion injection, ensuring any regime change after onset + // is attributable to the motion, not warm-up. + let mut update_ms = Vec::with_capacity(220); + for k in 0..200u64 { let t0 = Instant::now(); let v = 0.05 * ((k as f64 * 0.31).sin()); // ±0.05 deterministic noise state.update(k * 33_000_000, v).unwrap(); update_ms.push(t0.elapsed().as_secs_f64() * 1000.0); assert!( !state.snapshot().top_k_similarity[0].above_threshold, - "noise frame {k} crossed threshold — signature is too lax for this test" + "noise frame {k} crossed shape-match threshold — signature too lax" ); - frame_idx += 1; } + let baseline_regime = state.snapshot().regime; - // Now feed the motion ramp. Record the *first* frame whose snapshot says - // `above_threshold` — that's the introspection-path latency in frames. - let mut frames_to_recognise: Option = None; - for (i, v) in [1.0f64, 2.0, 3.0, 4.0, 5.0, 5.0, 5.0, 5.0] + // Now feed the motion ramp. Record the *first* frame each signal fires. + let mut shape_match_frames: Option = None; + let mut regime_change_frames: Option = None; + for (i, v) in [1.0f64, 2.0, 3.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0] .iter() .copied() .enumerate() { let t0 = Instant::now(); - state.update((100 + i as u64) * 33_000_000, v).unwrap(); + state.update((200 + i as u64) * 33_000_000, v).unwrap(); update_ms.push(t0.elapsed().as_secs_f64() * 1000.0); - if state.snapshot().top_k_similarity[0].above_threshold { - frames_to_recognise = Some(i + 1); // +1 → frames *into* the shape + let s = state.snapshot(); + let frame_num = i + 1; // 1-based frames into the shape + if shape_match_frames.is_none() && s.top_k_similarity[0].above_threshold { + shape_match_frames = Some(frame_num); + } + // A *regime change* counts when the classification flips away from the + // baseline (noise) regime. The snapshot.regime_changed flag flips for + // any frame-to-frame change; we want "first frame whose regime differs + // from the pre-motion baseline". + if regime_change_frames.is_none() && s.regime != baseline_regime { + regime_change_frames = Some(frame_num); + } + // Stop once we've seen both, or run out of motion frames. + if shape_match_frames.is_some() && regime_change_frames.is_some() { break; } - frame_idx += 1; } - let n = frames_to_recognise - .expect("introspection path should recognise the motion ramp within 8 frames"); - (n, update_ms) + LatencyMeasurement { + shape_match_frames: shape_match_frames + .expect("shape-match should fire within the 10-frame motion window"), + regime_change_frames, + update_ms, + } +} + +/// Compat shim for tests that only care about shape-match latency + costs. +fn frames_until_shape_recognised() -> (usize, Vec) { + let m = measure_motion_onset(); + (m.shape_match_frames, m.update_ms) } #[test] @@ -185,6 +223,40 @@ fn per_frame_update_p99_under_budget() { ); } +/// I6 — measure the parallel `regime_changed` signal added in this iteration. +/// This is the early-detection path that doesn't require a full signature +/// length of in-shape frames; the attractor analyzer flags trajectory shape +/// shifts directly. Reports both signals' latencies and the best ratio +/// either one achieves vs. the event-path floor. +#[test] +fn regime_change_path_latency() { + let m = measure_motion_onset(); + println!( + "ADR-099 I6: signals after motion onset\n \ + shape_match : {} frames into the ramp\n \ + regime_change: {:?} frames into the ramp\n \ + event-path best-case: {} frames", + m.shape_match_frames, m.regime_change_frames, EVENT_PATH_BEST_CASE_FRAMES + ); + let best_frames = match m.regime_change_frames { + Some(rc) => rc.min(m.shape_match_frames), + None => m.shape_match_frames, + }; + let best_ratio = EVENT_PATH_BEST_CASE_FRAMES as f64 / best_frames as f64; + println!( + " best-signal ratio: {best_ratio:.2}× (D8 target ≥{D8_LATENCY_RATIO_BAR}×, \ + met: {})", + best_ratio >= D8_LATENCY_RATIO_BAR + ); + // Regression bar: regime-change either fires within the event-path floor + // (≥1× ratio) OR shape-match's 5-frame baseline holds. Either path is a + // win; both red would mean we regressed both fast-detection paths. + assert!( + best_frames < EVENT_PATH_BEST_CASE_FRAMES, + "neither fast path beat the event-path floor of {EVENT_PATH_BEST_CASE_FRAMES} frames" + ); +} + #[test] fn snapshot_carries_regime_after_warmup() { // Independent of the latency bar — confirms the attractor analyzer feeds