From 4ea84570176c6cbf309b9877787034a6f7810d90 Mon Sep 17 00:00:00 2001
From: ruv <ruv@ruv.net>
Date: Fri, 8 May 2026 12:12:17 -0400
Subject: [PATCH] =?UTF-8?q?feat(temporal):=20Dense=20backend=20implementat?=
 =?UTF-8?q?ion=20(ADR-096=20=C2=A75=20A/B=20gate,=20#513)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the Dense placeholder from earlier commits. Now both backends
implement forward(); only SparseGqa supports streaming step()/KvCache,
which is the structural gap dense MHA can't bridge by design.

Dense path:
- src/dense.rs new — DenseHead wraps upstream dense_attention. Stores
  causal flag and (cloned) config. forward() is a one-line delegation;
  no GQA dispatch (dense_attention upstream requires q_heads == kv_heads).
- AetherTemporalHead::Dense changed from a unit variant to Dense(DenseHead).
  Construction succeeds for any valid TemporalHeadConfig where backend
  is Dense.
- AetherTemporalHead.step() returns BackendDoesNotSupportStreaming for
  Dense — there is no dense-MHA-with-KV-cache equivalent and offering
  one would silently swallow the ADR-096 §3.2 structural argument.
- AetherTemporalHead.make_cache() likewise — there's no cache to size
  for a dense kernel.

Errors:
- New TemporalError::BackendDoesNotSupportStreaming variant covers
  the Dense-step / Dense-make_cache cases. Specific so callers can
  fall back to forward() instead of giving up entirely.
- TemporalError::DenseBackendNotImplemented retained for v0.1
  back-compat (no consumers depend on it post-this-commit, but
  removing a public variant is a hard break). Future work can
  deprecate it once downstream callers move off.

Tests (19/19 passing):
- dense_backend_returns_typed_error → renamed and rewritten as
  dense_backend_forward_runs_with_matching_shape: constructs a Dense
  head, runs forward over (32, 4, 4, 16) Q/K/V, asserts output shape.
- New dense_backend_step_returns_streaming_error: constructs Dense,
  attempts make_cache, expects BackendDoesNotSupportStreaming.
- All 8 weight blob, 2 blob e2e, 3 streaming, 5 other smoke tests
  unchanged and still passing.

This commit completes the ADR-096 §5 A/B gate: callers can now run
the same Q/K/V through both backends and compare outputs / latency.
The §5 four-gate validation (contrastive loss within 1%, rank-1
within 1pp, Spearman ≥0.95, latency ≥5×) becomes a runnable
proposition, not a future task — though the actual gate run requires
trained AETHER weights, which is its own track.

Co-Authored-By: claude-flow <ruv@ruv.net>
---
 .../wifi-densepose-temporal/src/dense.rs      | 44 +++++++++++++++++++
 .../wifi-densepose-temporal/src/error.rs      |  9 ++++
 v2/crates/wifi-densepose-temporal/src/lib.rs  | 27 ++++++++----
 .../wifi-densepose-temporal/tests/smoke.rs    | 34 +++++++++++---
 4 files changed, 99 insertions(+), 15 deletions(-)
 create mode 100644 v2/crates/wifi-densepose-temporal/src/dense.rs

diff --git a/v2/crates/wifi-densepose-temporal/src/dense.rs b/v2/crates/wifi-densepose-temporal/src/dense.rs
new file mode 100644
index 00000000..5abc9023
--- /dev/null
+++ b/v2/crates/wifi-densepose-temporal/src/dense.rs
@@ -0,0 +1,44 @@
+use ruvllm_sparse_attention::{dense_attention, Tensor3};
+
+use crate::{TemporalError, TemporalHeadConfig};
+
+/// Dense MHA backend (ADR-096 §5 A/B baseline).
+///
+/// Wraps upstream `dense_attention` — the naive O(N²) reference kernel.
+/// Same approximation surface as classical scaled-dot-product attention,
+/// no log-stride / landmarks / windowing. Exists primarily as the
+/// reference path for the §5 validation gate (rank correlation,
+/// contrastive-loss parity, latency baseline).
+///
+/// Has no streaming counterpart: dense MHA structurally cannot do
+/// O(log T) decode — every new token requires recomputing the full
+/// attention matrix. Callers that want streaming must use SparseGqa.
+pub struct DenseHead {
+    causal: bool,
+    cfg: TemporalHeadConfig,
+}
+
+impl DenseHead {
+    pub fn new(cfg: &TemporalHeadConfig) -> Result<Self, TemporalError> {
+        cfg.validate()?;
+        Ok(Self {
+            causal: cfg.causal,
+            cfg: cfg.clone(),
+        })
+    }
+
+    pub fn cfg(&self) -> &TemporalHeadConfig {
+        &self.cfg
+    }
+
+    /// Naive O(N²) prefill. Q/K/V must share the same head count
+    /// (no GQA) — `dense_attention` upstream enforces it.
+    pub fn forward(
+        &self,
+        q: &Tensor3,
+        k: &Tensor3,
+        v: &Tensor3,
+    ) -> Result<Tensor3, TemporalError> {
+        Ok(dense_attention(q, k, v, self.causal)?)
+    }
+}
diff --git a/v2/crates/wifi-densepose-temporal/src/error.rs b/v2/crates/wifi-densepose-temporal/src/error.rs
index 9cccf73e..893a6243 100644
--- a/v2/crates/wifi-densepose-temporal/src/error.rs
+++ b/v2/crates/wifi-densepose-temporal/src/error.rs
@@ -5,9 +5,18 @@ pub enum TemporalError {
     #[error("temporal head config invalid: {0}")]
     InvalidConfig(&'static str),
 
+    /// Retained for back-compat with v0.1 callers; superseded by the
+    /// per-operation errors below now that Dense is implemented.
     #[error("dense MHA backend not implemented yet (ADR-096 §4.4 follow-up)")]
     DenseBackendNotImplemented,
 
+    /// Dense MHA has no notion of an accumulated KV cache — every
+    /// new frame requires recomputing the full N² attention matrix
+    /// (the structural gap ADR-096 §3.2 flagged). Callers that want
+    /// streaming decode must use the SparseGqa backend.
+    #[error("dense backend does not support streaming step(); use SparseGqa for online decode")]
+    BackendDoesNotSupportStreaming,
+
     #[error("sparse attention kernel error: {0}")]
     Kernel(String),
 }
diff --git a/v2/crates/wifi-densepose-temporal/src/lib.rs b/v2/crates/wifi-densepose-temporal/src/lib.rs
index 64f06129..f1bd0630 100644
--- a/v2/crates/wifi-densepose-temporal/src/lib.rs
+++ b/v2/crates/wifi-densepose-temporal/src/lib.rs
@@ -10,11 +10,13 @@
 // (ADR-096 §8.5) is finalized.
 
 pub mod config;
+pub mod dense;
 pub mod error;
 pub mod sparse;
 pub mod weights;
 
 pub use config::{TemporalBackendKind, TemporalHeadConfig};
+pub use dense::DenseHead;
 pub use error::TemporalError;
 pub use sparse::SparseGqaHead;
 pub use weights::{
@@ -28,12 +30,13 @@ pub use ruvllm_sparse_attention::{KvCache, Tensor3};
 
 /// Thin facade so callers can pick a backend by name.
 ///
-/// Today only `SparseGqa` is implemented; `Dense` is reserved per
-/// ADR-096 §4.4 and returns `TemporalError::DenseBackendNotImplemented`
-/// until the back-compat path lands.
+/// Both backends implement `forward()` for prefill. Only `SparseGqa`
+/// implements `step()` (streaming O(log T) decode against KvCache);
+/// dense MHA structurally lacks a streaming counterpart and returns
+/// `TemporalError::BackendDoesNotSupportStreaming` on `step()`.
 pub enum AetherTemporalHead {
     SparseGqa(SparseGqaHead),
-    Dense, // placeholder; ADR-096 §4.4 selection rule
+    Dense(DenseHead),
 }
 
 impl AetherTemporalHead {
@@ -42,7 +45,7 @@ impl AetherTemporalHead {
             TemporalBackendKind::SparseGqa => {
                 Ok(AetherTemporalHead::SparseGqa(SparseGqaHead::new(cfg)?))
             }
-            TemporalBackendKind::Dense => Err(TemporalError::DenseBackendNotImplemented),
+            TemporalBackendKind::Dense => Ok(AetherTemporalHead::Dense(DenseHead::new(cfg)?)),
         }
     }
 
@@ -59,7 +62,7 @@ impl AetherTemporalHead {
     ) -> Result<Tensor3, TemporalError> {
         match self {
             AetherTemporalHead::SparseGqa(h) => h.forward(q, k, v),
-            AetherTemporalHead::Dense => Err(TemporalError::DenseBackendNotImplemented),
+            AetherTemporalHead::Dense(h) => h.forward(q, k, v),
         }
     }
 
@@ -69,6 +72,9 @@ impl AetherTemporalHead {
     ///
     /// Returns the attention output for the single new token. Caller
     /// is responsible for downstream pooling / classifier head.
+    ///
+    /// Dense backend returns `BackendDoesNotSupportStreaming` — no
+    /// dense-MHA-with-KV-cache equivalent exists, by design.
     pub fn step(
         &self,
         q_new: &Tensor3,
@@ -78,17 +84,22 @@ impl AetherTemporalHead {
     ) -> Result<Tensor3, TemporalError> {
         match self {
             AetherTemporalHead::SparseGqa(h) => h.step(q_new, k_new, v_new, cache),
-            AetherTemporalHead::Dense => Err(TemporalError::DenseBackendNotImplemented),
+            AetherTemporalHead::Dense(_) => {
+                Err(TemporalError::BackendDoesNotSupportStreaming)
+            }
         }
     }
 
     /// Allocate a `KvCache` sized correctly for this head. Convenience
     /// wrapper so AETHER's `pose_tracker.rs` doesn't need to import
     /// the upstream crate.
+    ///
+    /// Dense backend returns `BackendDoesNotSupportStreaming` — there
+    /// is no cache to size for a dense kernel.
     pub fn make_cache(&self, capacity: usize) -> Result<KvCache, TemporalError> {
         match self {
             AetherTemporalHead::SparseGqa(h) => Ok(h.make_cache(capacity)),
-            AetherTemporalHead::Dense => Err(TemporalError::DenseBackendNotImplemented),
+            AetherTemporalHead::Dense(_) => Err(TemporalError::BackendDoesNotSupportStreaming),
         }
     }
 }
diff --git a/v2/crates/wifi-densepose-temporal/tests/smoke.rs b/v2/crates/wifi-densepose-temporal/tests/smoke.rs
index 0b24ea08..92cd35f9 100644
--- a/v2/crates/wifi-densepose-temporal/tests/smoke.rs
+++ b/v2/crates/wifi-densepose-temporal/tests/smoke.rs
@@ -63,18 +63,38 @@ fn sparse_mha_path_runs_when_qkv_heads_match() {
 }
 
 #[test]
-fn dense_backend_returns_typed_error() {
+fn dense_backend_forward_runs_with_matching_shape() {
+    // Dense_attention upstream requires q_heads == kv_heads (no GQA).
+    // Use MHA shape; n_classes/n_layers don't matter for forward-only.
     let cfg = TemporalHeadConfig {
         backend: TemporalBackendKind::Dense,
         q_heads: 4,
-        kv_heads: 1,
-        head_dim: 32,
-        window: 32,
-        block_size: 16,
+        kv_heads: 4,
+        head_dim: 16,
+        window: 8,
+        block_size: 4,
         causal: true,
     };
-    let err = AetherTemporalHead::new(&cfg).err().expect("dense rejected");
-    matches!(err, TemporalError::DenseBackendNotImplemented);
+    let head = AetherTemporalHead::new(&cfg).expect("construct dense");
+    let (q, k, v) = make_qkv(32, 4, 4, 16);
+    let out = head.forward(&q, &k, &v).expect("dense forward");
+    assert_eq!(out.shape(), (32, 4, 16));
+}
+
+#[test]
+fn dense_backend_step_returns_streaming_error() {
+    let cfg = TemporalHeadConfig {
+        backend: TemporalBackendKind::Dense,
+        q_heads: 4,
+        kv_heads: 4,
+        head_dim: 16,
+        window: 8,
+        block_size: 4,
+        causal: true,
+    };
+    let head = AetherTemporalHead::new(&cfg).expect("construct dense");
+    let cache_err = head.make_cache(32).err().expect("no cache for dense");
+    matches!(cache_err, TemporalError::BackendDoesNotSupportStreaming);
 }
 
 #[test]