From ca975276460e2fb865e5dbf23baa3aa0b0a12afd Mon Sep 17 00:00:00 2001
From: ruv <ruv@ruv.net>
Date: Wed, 13 May 2026 23:29:37 -0400
Subject: [PATCH] =?UTF-8?q?feat(introspection):=20I6=20=E2=80=94=20regime-?=
 =?UTF-8?q?changed=20signal=20+=20per-frame=20analyze=20+=20honest=20ADR-0?=
 =?UTF-8?q?99=20D8=20amendment?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three threads in this commit:

1) Per-frame attractor analysis (default analyze_every_n: 8 → 1).
   The I5 benchmark put per-frame update at 0.012 ms p99 — 83× under D4's
   1 ms budget. The cost case for the every-8th-frame default doesn't hold;
   per-frame analysis is what makes regime_changed a viable early-detection
   trigger.

2) New `regime_changed: bool` field in IntrospectionSnapshot — flips on any
   frame whose attractor regime classification differs from the previous
   frame's. Pairs with top_k_similarity (full-shape match) to give
   downstream consumers two latencies with different robustness profiles.

3) Honest amendment of ADR-099 D8 to reflect empirical reality:
   - L1 stand-in achieves 3.20× ratio (5-frame shape match vs 16-frame
     event-path floor); the 10× aspirational bar is architecturally
     unreachable at 1-D scalar feature resolution.
   - regime_changed didn't fire in the 10-frame motion window — the
     200-frame noise trajectory dominates the Lyapunov classification, and
     short perturbations don't shift the regime fast enough on a scalar
     feature.
   - Path to 10×: ADR-208 Phase 2 (Hailo NPU vec128 embeddings) — multi-dim
     partial matches discriminate from noise in 1-2 frames, not 5.
   - Side finding: midstream temporal-compare::DTW uses *discrete equality*
     cost (designed for LLM tokens), not numeric distance — swapping it in
     for f64 amplitude scoring would be strictly worse than the L1 stand-in.
     A numeric DTW is a separate concern (hand-roll or new crate).
   - Revised D8: ship behind --introspection (off by default) until multi-
     dim features land. Per-frame update budget IS met (0.041 ms p99 in this
     bench, ~24× under the 1 ms bar) — the feature is cheap enough to
     carry dark today.

cargo test -p wifi-densepose-sensing-server --no-default-features:
  introspection (lib): 8 passed, 0 failed
  introspection_latency (test): 5 passed, 0 failed (incl. new
                                 regime_change_path_latency)
clippy: clean on the introspection surface (pre-existing approx_constant
        lints in pose.rs / main.rs unchanged).

Co-Authored-By: claude-flow <ruv@ruv.net>
---
 .../ADR-099-midstream-introspection-tap.md    |  22 +++-
 .../src/introspection.rs                      |  27 ++++-
 .../tests/introspection_latency.rs            | 112 ++++++++++++++----
 3 files changed, 134 insertions(+), 27 deletions(-)

diff --git a/docs/adr/ADR-099-midstream-introspection-tap.md b/docs/adr/ADR-099-midstream-introspection-tap.md
index 0f8c353d..a60d71bf 100644
--- a/docs/adr/ADR-099-midstream-introspection-tap.md
+++ b/docs/adr/ADR-099-midstream-introspection-tap.md
@@ -118,9 +118,27 @@ Three reference signatures ship under `signatures/` in the crate as developer fi
 
 ### D8 — Measurement-first adoption — promotion bar is empirical
 
-Phase 0 spike measures the latency win against the existing `/ws/sensing` path on a recorded session. **Promotion to "ship by default" requires ≥10× p99 latency reduction on the "motion shape recognized" event class**, measured on at least one labelled recording. If the bar isn't met, the feature lives behind an `--introspection` CLI flag (default off) until it is.
+Phase 0 spike measures the latency win against the existing `/ws/sensing` path on a recorded session. **Original aspirational bar: ≥10× p99 latency reduction on the "motion shape recognized" event class**, measured on at least one labelled recording.
 
-*Consequences:* this isn't an architectural bet — the value claim is verifiable, and the feature carries its own kill switch if reality disagrees with theory.
+**Empirical baseline from `tests/introspection_latency.rs`** (I5/I6 — host-side L1 stand-in scoring + midstream-attractor regime classification on a 1-D mean-amplitude feature, 5-frame motion-ramp signature, 200 frames of noise warm-up, `analyze_every_n = 1`):
+
+| Signal | Frames to recognise | Ratio vs event-path floor (16) |
+|---|---|---|
+| `top_k_similarity[0].above_threshold` | 5 | **3.20×** |
+| `regime_changed` (10-frame motion window) | did not fire | — |
+| Per-frame `update()` p99 | **0.041 ms** (~24× under D4's 1 ms budget) | — |
+
+The 10× bar is **architecturally unreachable** at the 1-D scalar feature resolution this stand-in operates at — `signature_score`'s length-normalised L1 needs roughly the full signature length of in-shape frames to discriminate from noise (any shortcut trades false positives), and the attractor's Lyapunov classification needs more than a 10-frame perturbation to overcome a long noise trajectory. The 3.2× ratio is the structural ceiling for this feature class.
+
+**Closing the gap to 10× requires multi-dim features — specifically the `vec128` embeddings from ADR-208 Phase 2 (Hailo NPU)** — where partial matches become statistically distinguishable from noise after 1–2 frames, not 5. Until then, the adoption decision **revises the bar**:
+
+* **Ship behind `--introspection` (off by default)** until either ADR-208 P2 lands a multi-dim feature path, *or* the L1 stand-in is replaced with a numeric DTW that scores partial-prefix matches at acceptable false-positive rates.
+* The per-frame `update()` cost bar (D4: ≤1 ms p99) **is met** — the feature is cheap enough to carry dark today.
+* **Two parallel signals** in the snapshot (`top_k_similarity` for shape match, `regime_changed` for trajectory shift) cover different latency / robustness trade-offs — neither alone clears 10× on a 1-D scalar, but they cover complementary use cases. Downstream consumers pick.
+
+> **Side finding on midstream's `temporal-compare::DTW`**: its DTW uses *discrete equality* cost (0/1 between elements), not numeric distance — it's designed for LLM token sequences. On `f64` amplitude values, that scoring would be strictly worse than the L1 stand-in (every cell costs 1, no useful gradient). "Swap in midstream's DTW" — implied in earlier revisions of this ADR and proposed in I5/I6 — therefore isn't the optimization that closes D8. A *numeric* DTW would need to be hand-rolled or pulled from a different crate; tracked as a P1 follow-up alongside ADR-208 P2.
+
+*Consequences:* the kill switch is real (off-by-default CLI flag); the architectural value (continuous-state introspection surface + a per-frame regime signal + a cheap shape-match probe + a verified ≤1 ms update budget) ships, with the *latency-win* bar deferred to when multi-dim features arrive.
 
 ---
 
diff --git a/v2/crates/wifi-densepose-sensing-server/src/introspection.rs b/v2/crates/wifi-densepose-sensing-server/src/introspection.rs
index 66484c5a..140706e3 100644
--- a/v2/crates/wifi-densepose-sensing-server/src/introspection.rs
+++ b/v2/crates/wifi-densepose-sensing-server/src/introspection.rs
@@ -45,11 +45,13 @@ pub const DEFAULT_EMBEDDING_DIM: usize = 1;
 /// matches the snapshot carries.
 pub const DEFAULT_TOP_K: usize = 5;
 
-/// Frames since the last `analyze()` call. We don't analyse on every frame —
-/// the attractor's Lyapunov estimate is ~9 ms for a 1 k-point window per
-/// midstream's bench, which is fine at 30 Hz but wastes CPU at higher rates.
-/// One analysis every N frames stays well under the per-frame budget.
-pub const DEFAULT_ANALYZE_EVERY_N_FRAMES: u32 = 8;
+/// Frames since the last `analyze()` call. Per-frame analyse is cheap (the
+/// I5 benchmark put attractor + L1-scoring update p99 at 0.012 ms on a
+/// desktop runner, ~83× under the 1 ms D4 budget — even on a Pi 5 we have
+/// orders of magnitude of headroom), and per-frame analyse is what makes
+/// the `regime_changed` snapshot signal viable as an early-detection
+/// trigger. Default to **every frame** unless deployment tunes it down.
+pub const DEFAULT_ANALYZE_EVERY_N_FRAMES: u32 = 1;
 
 /// One labelled segment of derived feature vectors used as a DTW pattern.
 /// Schema (per ADR-099 D7) — JSON-loaded from `signatures/*.json` at startup.
@@ -153,6 +155,12 @@ pub struct IntrospectionSnapshot {
     /// Analyzer confidence in `[0, 1]`. `0.0` until the analyzer has enough
     /// data; tracks midstream's `AttractorInfo::confidence`.
     pub attractor_confidence: f64,
+    /// `true` when this frame's regime classification differs from the
+    /// previous frame's — an **early-detection signal** that doesn't require
+    /// a full signature length of frames to fire (ADR-099 D8: a parallel
+    /// fast path to the shape-match latency, useful for "something changed,
+    /// look closer" semantics on dashboards / downstream consumers).
+    pub regime_changed: bool,
     /// Top-k DTW matches against the loaded signature library. Empty when the
     /// library is empty or no signatures rose above the score floor.
     pub top_k_similarity: Vec<SimilarityMatch>,
@@ -227,6 +235,7 @@ impl IntrospectionState {
                 lyapunov_exponent: None,
                 attractor_dim: cfg.embedding_dim,
                 attractor_confidence: 0.0,
+                regime_changed: false,
                 top_k_similarity: Vec::new(),
             },
         }
@@ -263,6 +272,7 @@ impl IntrospectionState {
         // Run the (relatively expensive) analyze step every Nth frame; in
         // between, keep the previous regime/Lyapunov in the snapshot — they're
         // smooth signals, not edge-sensitive.
+        let prev_regime = self.last_snapshot.regime;
         self.frames_since_analyze = self.frames_since_analyze.saturating_add(1);
         if self.frames_since_analyze >= self.analyze_every_n {
             self.frames_since_analyze = 0;
@@ -278,6 +288,13 @@ impl IntrospectionState {
                 Err(other) => return Err(other),
             }
         }
+        // ADR-099 D8: early-detection signal — `regime_changed` flips on any
+        // frame whose classification differs from the previous frame's. Pairs
+        // with `top_k_similarity` (which needs the full shape) to give
+        // downstream consumers two latencies to choose from per use case.
+        // Don't count Unknown→Unknown as a change; do count Unknown→<any> as
+        // a change (the warm-up moment is itself informative).
+        self.last_snapshot.regime_changed = prev_regime != self.last_snapshot.regime;
 
         // DTW scoring runs every frame; cheap when the library is small (and
         // empty when it's empty). See `score_signatures` for the metric.
diff --git a/v2/crates/wifi-densepose-sensing-server/tests/introspection_latency.rs b/v2/crates/wifi-densepose-sensing-server/tests/introspection_latency.rs
index 47469eb9..715cc8dd 100644
--- a/v2/crates/wifi-densepose-sensing-server/tests/introspection_latency.rs
+++ b/v2/crates/wifi-densepose-sensing-server/tests/introspection_latency.rs
@@ -66,54 +66,92 @@ fn motion_signature() -> Signature {
     }
 }
 
+/// Result of one motion-onset benchmark run: how many frames until each
+/// detection signal first fires, plus per-frame `update()` wall-clock costs.
+struct LatencyMeasurement {
+    /// Frames into the motion before `top_k_similarity[0].above_threshold` is
+    /// true (the "shape recognised" full-pattern path).
+    shape_match_frames: usize,
+    /// Frames into the motion before `regime_changed` is true (the parallel
+    /// fast-detection path added in I6). `None` if it never fired in the
+    /// measurement window — meaning the regime classification stayed at
+    /// whatever it was during warm-up.
+    regime_change_frames: Option<usize>,
+    /// Per-frame `update()` wall-clock samples (ms).
+    update_ms: Vec<f64>,
+}
+
 /// Feed N background-noise frames followed by the motion ramp; return the
-/// 0-based frame index at which the snapshot first reports `above_threshold`.
-fn frames_until_shape_recognised() -> (usize, Vec<f64>) {
+/// 0-based frame index at which each detection signal first fires.
+fn measure_motion_onset() -> LatencyMeasurement {
     let lib = SignatureLibrary::from_signatures(vec![motion_signature()]);
     let cfg = IntrospectionConfig {
         trajectory_len: 128,
         embedding_dim: 1,
-        analyze_every_n: 8,
+        // I6: analyze on every frame so the regime-change signal is responsive.
+        analyze_every_n: 1,
         library: lib,
     };
     let mut state = IntrospectionState::with_config(cfg);
 
-    // 100 frames of background noise — small drifty values around 0.
-    let mut frame_idx = 0usize;
-    let mut update_ms = Vec::with_capacity(125);
-    for k in 0..100u64 {
+    // 200 frames of background noise — small drifty values around 0. We feed
+    // 200 (not 100) so the attractor analyzer is past its 100-point warm-up
+    // *before* the motion injection, ensuring any regime change after onset
+    // is attributable to the motion, not warm-up.
+    let mut update_ms = Vec::with_capacity(220);
+    for k in 0..200u64 {
         let t0 = Instant::now();
         let v = 0.05 * ((k as f64 * 0.31).sin()); // ±0.05 deterministic noise
         state.update(k * 33_000_000, v).unwrap();
         update_ms.push(t0.elapsed().as_secs_f64() * 1000.0);
         assert!(
             !state.snapshot().top_k_similarity[0].above_threshold,
-            "noise frame {k} crossed threshold — signature is too lax for this test"
+            "noise frame {k} crossed shape-match threshold — signature too lax"
         );
-        frame_idx += 1;
     }
+    let baseline_regime = state.snapshot().regime;
 
-    // Now feed the motion ramp. Record the *first* frame whose snapshot says
-    // `above_threshold` — that's the introspection-path latency in frames.
-    let mut frames_to_recognise: Option<usize> = None;
-    for (i, v) in [1.0f64, 2.0, 3.0, 4.0, 5.0, 5.0, 5.0, 5.0]
+    // Now feed the motion ramp. Record the *first* frame each signal fires.
+    let mut shape_match_frames: Option<usize> = None;
+    let mut regime_change_frames: Option<usize> = None;
+    for (i, v) in [1.0f64, 2.0, 3.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]
         .iter()
         .copied()
         .enumerate()
     {
         let t0 = Instant::now();
-        state.update((100 + i as u64) * 33_000_000, v).unwrap();
+        state.update((200 + i as u64) * 33_000_000, v).unwrap();
         update_ms.push(t0.elapsed().as_secs_f64() * 1000.0);
-        if state.snapshot().top_k_similarity[0].above_threshold {
-            frames_to_recognise = Some(i + 1); // +1 → frames *into* the shape
+        let s = state.snapshot();
+        let frame_num = i + 1; // 1-based frames into the shape
+        if shape_match_frames.is_none() && s.top_k_similarity[0].above_threshold {
+            shape_match_frames = Some(frame_num);
+        }
+        // A *regime change* counts when the classification flips away from the
+        // baseline (noise) regime. The snapshot.regime_changed flag flips for
+        // any frame-to-frame change; we want "first frame whose regime differs
+        // from the pre-motion baseline".
+        if regime_change_frames.is_none() && s.regime != baseline_regime {
+            regime_change_frames = Some(frame_num);
+        }
+        // Stop once we've seen both, or run out of motion frames.
+        if shape_match_frames.is_some() && regime_change_frames.is_some() {
             break;
         }
-        frame_idx += 1;
     }
 
-    let n = frames_to_recognise
-        .expect("introspection path should recognise the motion ramp within 8 frames");
-    (n, update_ms)
+    LatencyMeasurement {
+        shape_match_frames: shape_match_frames
+            .expect("shape-match should fire within the 10-frame motion window"),
+        regime_change_frames,
+        update_ms,
+    }
+}
+
+/// Compat shim for tests that only care about shape-match latency + costs.
+fn frames_until_shape_recognised() -> (usize, Vec<f64>) {
+    let m = measure_motion_onset();
+    (m.shape_match_frames, m.update_ms)
 }
 
 #[test]
@@ -185,6 +223,40 @@ fn per_frame_update_p99_under_budget() {
     );
 }
 
+/// I6 — measure the parallel `regime_changed` signal added in this iteration.
+/// This is the early-detection path that doesn't require a full signature
+/// length of in-shape frames; the attractor analyzer flags trajectory shape
+/// shifts directly. Reports both signals' latencies and the best ratio
+/// either one achieves vs. the event-path floor.
+#[test]
+fn regime_change_path_latency() {
+    let m = measure_motion_onset();
+    println!(
+        "ADR-099 I6: signals after motion onset\n  \
+         shape_match  : {} frames into the ramp\n  \
+         regime_change: {:?} frames into the ramp\n  \
+         event-path best-case: {} frames",
+        m.shape_match_frames, m.regime_change_frames, EVENT_PATH_BEST_CASE_FRAMES
+    );
+    let best_frames = match m.regime_change_frames {
+        Some(rc) => rc.min(m.shape_match_frames),
+        None => m.shape_match_frames,
+    };
+    let best_ratio = EVENT_PATH_BEST_CASE_FRAMES as f64 / best_frames as f64;
+    println!(
+        "  best-signal ratio: {best_ratio:.2}× (D8 target ≥{D8_LATENCY_RATIO_BAR}×, \
+         met: {})",
+        best_ratio >= D8_LATENCY_RATIO_BAR
+    );
+    // Regression bar: regime-change either fires within the event-path floor
+    // (≥1× ratio) OR shape-match's 5-frame baseline holds. Either path is a
+    // win; both red would mean we regressed both fast-detection paths.
+    assert!(
+        best_frames < EVENT_PATH_BEST_CASE_FRAMES,
+        "neither fast path beat the event-path floor of {EVENT_PATH_BEST_CASE_FRAMES} frames"
+    );
+}
+
 #[test]
 fn snapshot_carries_regime_after_warmup() {
     // Independent of the latency bar — confirms the attractor analyzer feeds