From 91248536bcbca3a82577efa915fa03cb3442619f Mon Sep 17 00:00:00 2001
From: rUv <ruv@ruv.net>
Date: Sat, 13 Jun 2026 18:24:40 -0400
Subject: [PATCH] =?UTF-8?q?feat(beyond-sota):=20ADR-156=20M2=20=E2=80=94?=
 =?UTF-8?q?=20RaBitQ=20unbiased=20distance=20estimator=20(rigorous=20publi?=
 =?UTF-8?q?shed=20negative=20on=20strict-K)=20(#1056)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(ruvector): RaBitQ unbiased distance estimator (ADR-156 M2)

Implement the real Gao & Long (SIGMOD 2024) RaBitQ contribution on top of
the existing Pass-2 rotation: an unbiased estimator of the inner product /
squared distance recovered from the 1-bit code plus 8 B/vec per-vector side
info (residual_norm + x_dot_o), used to rerank the candidate set instead of
raw Hamming.

- src/estimator.rs (new): EstimatorSketch, SideInfo, EstimatorQuery,
  DistanceEstimator (estimate_inner_product / estimate_sq_distance /
  ranking_key / cosine_ranking_key), EstimatorBank (topk_estimated[_cosine],
  with_centroid). Zero-centroid simplification documented; paper-faithful
  centroid path also built.
- src/rotation.rs: extract apply_padded() (full padded FHT frame the code
  lives in); apply() now truncates apply_padded(). No behaviour change.
- lib.rs: export estimator types.

Additive + backward-compatible: Pass-1 Sketch / Pass-2 SketchBank / WireSketch
wire format unchanged; all external callers use Pass-1 and are unaffected.

Co-Authored-By: claude-flow <ruv@ruv.net>

* test(ruvector): estimator strict-K coverage harness (ADR-156 M2)

Add measure_estimator (cosine rerank) + measure_estimator_euclidean to the
coverage harness, on the BIT-IDENTICAL fixture / cluster centres / query
stream / cosine ground truth as measure_pass1/measure_pass2 — apples-to-apples
sign-Hamming vs unbiased-estimator-rerank.

Regression tests:
- estimator_rerank_not_worse_than_sign (>= sign-only Pass-2 on a fixed fixture)
- estimator_coverage_is_deterministic
- estimator_coverage_report (--nocapture prints the strict-K table)

MEASURED strict-K (candidate_k=K=8): Pass-1 36.13% -> Pass-2-sign 46.39% ->
estimator-cosine 49.71%. Still short of the ADR-084 90% strict bar; estimator
reaches 95.12% at candidate_k=24 (vs sign 91.60%). Published negative.

Co-Authored-By: claude-flow <ruv@ruv.net>

* docs(ruvector): record RaBitQ estimator measured negative (ADR-156 §11, ADR-084)

- sketch_bench: estimator cosine/euclid columns in the coverage table.
- ADR-156 §11 (new): estimator formula + zero-centroid simplification stated
  honestly; strict-K coverage table; RESOLVED-NEGATIVE verdict (49.71% strict,
  short of 90%); pinning test names. §5 #2 + §10.5 updated.
- ADR-084 'Pass 2b' (new): estimator landed + measured strict-K vs the bar.
- CHANGELOG [Unreleased]: ADR-156 §11 Milestone-2 entry.

Co-Authored-By: claude-flow <ruv@ruv.net>
---
 CHANGELOG.md                                  |   1 +
 docs/adr/ADR-084-rabitq-similarity-sensor.md  |  29 +
 .../ADR-156-ruvector-fusion-beyond-sota.md    |  62 +-
 .../benches/sketch_bench.rs                   |  24 +-
 .../wifi-densepose-ruvector/src/coverage.rs   | 161 ++++
 .../wifi-densepose-ruvector/src/estimator.rs  | 685 ++++++++++++++++++
 v2/crates/wifi-densepose-ruvector/src/lib.rs  |   4 +
 .../wifi-densepose-ruvector/src/rotation.rs   |  26 +-
 8 files changed, 982 insertions(+), 10 deletions(-)
 create mode 100644 v2/crates/wifi-densepose-ruvector/src/estimator.rs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c30f9e9c..bd4597ef 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   - **#5 / #6 / #7 MEASURED-NULL, left as-is.** `node_attention_weights` 181 ns (2 nodes)…848 ns (8) — sub-µs, no hot-path alloc. `tomography reconstruct` (full 50-iter ISTA, 256 voxels) 47.5 µs (16 links) / 60.4 µs (32) — the 2 voxel buffers are already alloc-once + `.fill`-reused, negligible vs O(iters·links·voxels). `pose_tracker` Kalman cycle 150 ns (17 keypoints) / 2.82 µs (170) — the "gain matrices" are fixed-size **stack** arrays, zero heap to reuse. No rewrite shipped; the committed benches prove each is not hot.
   - **#8 MEASUREMENT-ONLY, BLAS-gated (number deferred, not fabricated).** Correction to the finding: `extract_perturbation` does **not** recompute the SVD (it projects against cached `finalize_calibration` modes); the real per-call eigendecomposition is the `eigenvalue`-feature `estimate_occupancy` (`cov.eigh()` on a 56×56 covariance). The `eig` bench is committed but `openblas-src` won't build on this Windows host ("Non-vcpkg builds are not supported on Windows" — the exact reason the project gate runs `--no-default-features`), so its µs cost must come from a Linux/BLAS box. Recorded, not estimated. Incremental SVD stays a sized future item.
   - **#14 / #16 / #19 RESOLVED — tests added (no behaviour change).** `fft_operator_within_tolerance_of_dense_canonical56` pins the full `Cir` output of the opt-in FFT path within a documented relative tolerance of the dense path on the production canonical-56 config (τ ∈ {20,50,90} ns) — it changes the witness hash, so it must be provably *close*, not silently divergent. `refinement_terminates_at_iteration_cap_when_not_converging` (+ convergent companion) proves the LO-offset refinement terminates at exactly `max_iterations` on a non-converging input (cap, not convergence, bounds the loop; internal `…_counted` refactor returns the identical offsets). `ratio_finite_at_and_below_1e_12_epsilon` pins that the conjugate-product CSI-ratio (no division → no `1e-12` divide-guard needed) is finite + bit-exact at/below the epsilon boundary and at exact zero (where a naive `H_i/H_j` ratio is ±inf/NaN).
+- **ADR-156 §11 Milestone-2: RaBitQ unbiased distance estimator — IMPLEMENTED & MEASURED (RESOLVED-NEGATIVE on the strict-K bar).** Closes the §10.5 / §8 backlog "full RaBitQ residual-distance estimator (not just a uniform scalar code)" item — the **real** Gao & Long (SIGMOD 2024) contribution, not just sign bits. New `wifi-densepose-ruvector/src/estimator.rs`: `EstimatorSketch` carries the Pass-2 sign code (over the padded FHT length `D = next_pow2(dim)`) **plus 8 B/vec side info** (`residual_norm` + `x_dot_o = ⟨x̄, o'⟩`, 2× f32); `DistanceEstimator` computes the **unbiased** estimate `⟨o',q'⟩ ≈ ⟨x̄,q'⟩ / x_dot_o` (the random rotation makes the 1-bit code's quantization error orthogonal-in-expectation to the query, paper `O(1/√D)` bound); `EstimatorBank::topk_estimated_cosine` reranks the candidate set by the estimate instead of raw Hamming. **Zero-centroid simplification (`c = 0`) stated honestly** — the paper-faithful per-cluster centroid path (`from_embedding_centred` / `EstimatorBank::with_centroid`) is also built so the simplification is a measured choice (no centroid coverage number is reported against the cosine ground truth, because cosine-of-residual ≠ cosine-of-raw would be a metric mismatch). **Purely additive + backward-compatible** — new types only; Pass-1 `Sketch` / Pass-2 `SketchBank` / `WireSketch` wire format unchanged; all external callers (`event_log.rs`, `signal/longitudinal.rs`, `sensing-server`) use Pass-1 and are unaffected. **MEASURED strict-K coverage** (same fixture/seeds as §10: dim=128 N=2048 K=8, 64 clusters, noise=0.35, 128 queries, cosine ground truth): the estimator lifts the strict `candidate_k=K` bar **46.39% (Pass-2 sign) → 49.71% (estimator, cosine rerank)** — a real **+3.3 pp** lift, **still ~40 pp short of the ADR-084 ≥90% strict bar.** At over-fetch the estimator beats sign (candidate_k=24: **95.12%** vs 91.60%). **Honest verdict — RESOLVED-NEGATIVE: the unbiased estimator does NOT clear the strict-K 90% bar on this distribution** (the binding constraint is the 1-bit code's information ceiling, not estimator variance); the bar is still met only via the over-fetch "candidate set" pattern ADR-084 specifies, though the estimator **reduces the over-fetch factor** needed. A published negative, reported as such — no benchmark tuned to manufacture a pass. Unbiasedness pinned by `estimator_unbiased_on_fixture` (Monte-Carlo mean over 4000 rotation seeds → true inner product within tolerance); not-worse-than-sign pinned by `estimator_rerank_not_worse_than_sign`; determinism by `estimator_is_deterministic`. +12 tests in the crate (119→131). Workspace **3,228 / 0 failed** (`cargo test --workspace --no-default-features`, 162 test binaries, single clean run), Python proof **VERDICT: PASS** (`f8e76f21…46f7a`, unchanged — estimator is not on the proof's signal path). Full numbers + reproduce commands in ADR-156 §11 / ADR-084 "Pass 2b".
 - **ADR-156 §8 Milestone-1: RaBitQ Pass-2 randomized rotation + multi-bit experiment — IMPLEMENTED & MEASURED (RESOLVED-PARTIAL).** Closes the §8 "Multi-bit / Extended RaBitQ" backlog item. New `wifi-densepose-ruvector/src/rotation.rs`: a deterministic randomized orthogonal rotation `R = H·D` — **Fast Hadamard Transform** (`O(d log d)`, in-place, `1/√m`-normalized so norm-preserving) + seeded ±1 sign flips (SplitMix64 from a stored `u64` seed; identical at index + query time). Chosen over a dense `d×d` matrix (`O(d²)`, infeasible at the 65,535-d the wire format provisions for); pads to `next_pow2(d)`. Additive, backward-compatible API (`Sketch::from_embedding_rotated`, `SketchBank::with_rotation` + `insert_embedding`/`topk_embedding`/`novelty_embedding`); Pass-1 and the wire format are byte-for-byte unchanged. New `coverage.rs` single-source-of-truth top-K coverage harness (anisotropic planted-cluster fixture, cosine ground truth) backs both a `#[test]` report and the `sketch_bench` coverage table. **MEASURED (dim=128 N=2048 K=8, 64 clusters, noise=0.35, 128 queries, seeded):** at the strict `candidate_k=K` bar, rotation lifts coverage **36.13% → 46.39%**; Pass-2 reaches the **ADR-084 ≥90% bar at candidate_k=24 (~3× over-fetch)**; multi-bit Pass-3 reaches 54%/67%/74% at 2/3/4-bit (strict bar). **Honest verdict: neither rotation nor ≤4-bit multi-bit clears the strict-K 90% bar on this distribution — the bar is met only via the over-fetch "candidate set" pattern ADR-084 specifies.** No benchmark was tuned to manufacture a pass; the strict-bar gap is documented (ADR-156 §10, ADR-084 "Pass 2" section). +19 tests in the crate (100→119), workspace **3,225 / 0 failed**, Python proof VERDICT: PASS (`f8e76f21…`, unchanged — sketch is not on the proof's signal path).
 - **Beyond-SOTA `v2/crates/` sweep (ADR-154–158) + full stub-implementation push — every claim MEASURED or graded.** A 5-milestone review/optimize/secure/benchmark/validate sweep, then a verified-audit-driven push to replace every production stub with real, tested logic (no labels, no placeholders). Each fix is pinned by a test that fails on the old code; every number ships with a reproduce command. Workspace: **3,122 tests / 0 failed** (`cargo test --workspace --no-default-features`), Python proof **VERDICT: PASS** (bit-exact).
   - **ADR-154 Signal/DSP** — revived a dead ADR-134 CIR coherence gate (canonical-56 vs ht20 mismatch meant it never ran in production: 8/8 Err → 8/8 Ok); NaN-bypass + window div0 guards; PSD FFT-planner cache (**2.0–3.1×**) + honored DTW band (**2.4–4.1×**).
diff --git a/docs/adr/ADR-084-rabitq-similarity-sensor.md b/docs/adr/ADR-084-rabitq-similarity-sensor.md
index e9316a5f..342d51f7 100644
--- a/docs/adr/ADR-084-rabitq-similarity-sensor.md
+++ b/docs/adr/ADR-084-rabitq-similarity-sensor.md
@@ -289,6 +289,35 @@ ADR-156 §10. Summary:
   prior top-K acceptance number in this ADR depend on the fixed path; the
   ≥90% coverage criterion is only meaningful post-fix.
 
+## Pass 2b — RaBitQ unbiased distance estimator (ADR-156 §11, landed 2026-06)
+
+The **real** RaBitQ contribution (Gao & Long, SIGMOD 2024) — an
+**unbiased estimator of the inner product / distance** from the 1-bit
+code + per-vector side info, not just sign bits — is now implemented and
+**MEASURED against this ADR's ≥90% strict-K bar**:
+
+- **Implemented** — `crates/wifi-densepose-ruvector/src/estimator.rs`:
+  `EstimatorSketch` (Pass-2 sign code + 8 B/vec side info:
+  `residual_norm` + `x_dot_o = ⟨x̄, o'⟩`), `DistanceEstimator`
+  (`⟨o',q'⟩ ≈ ⟨x̄,q'⟩ / x_dot_o`, the paper's unbiased rescale), and
+  `EstimatorBank` reranking candidates by the estimate instead of raw
+  Hamming. **Zero-centroid simplification** (`c = 0`) documented;
+  paper-faithful centroid path also built (`with_centroid`). Additive —
+  Pass-1/Pass-2 and the wire format are unchanged.
+- **MEASURED strict-K coverage** (same fixture as §"Pass 2", cosine
+  ground truth): the estimator lifts the strict `candidate_k = K` bar
+  **46.39% (Pass-2 sign) → 49.71% (estimator, cosine rerank)** — a real
+  **+3.3 pp** lift, but **still ~40 pp short of the ≥90% strict bar.**
+  At over-fetch the estimator does better than sign (95.12% vs 91.60% at
+  candidate_k = 24). **Honest verdict: the unbiased estimator does NOT
+  clear the strict-K 90% bar on this distribution** — the binding
+  constraint is the 1-bit code's information ceiling, not estimator
+  variance. The ≥90% acceptance bar is still met only via the over-fetch
+  "candidate set" pattern this ADR's Decision specifies; the estimator
+  **reduces the over-fetch factor** needed but does not remove it. This
+  is a **published negative**, reported as such. Full numbers + reproduce
+  commands in ADR-156 §11.
+
 ## Open questions
 
 - **Does `BinaryQuantized` need a randomized rotation pre-pass for
diff --git a/docs/adr/ADR-156-ruvector-fusion-beyond-sota.md b/docs/adr/ADR-156-ruvector-fusion-beyond-sota.md
index d50df09c..4389fcd5 100644
--- a/docs/adr/ADR-156-ruvector-fusion-beyond-sota.md
+++ b/docs/adr/ADR-156-ruvector-fusion-beyond-sota.md
@@ -103,7 +103,7 @@ The double-clone elimination is also correctness-neutral: all 100 `viewpoint`/`m
 | # | Candidate | What | Grade | Verdict |
 |---|-----------|------|-------|---------|
 | **1** | **SymphonyQG** (SIGMOD 2025, public code) | Unified quantization + graph ANN; source reports **3.5–17× QPS over HNSW at equal recall**, pure-CPU / edge-portable. | **CLAIMED** (author-measured; **not reproduced on our hardware** — reproduction is future work) | **Lead beyond-SOTA candidate for the ruvector ANN path.** Propose as ACCEPTED-future; cite honestly as "claimed by source, reproduction pending." Best fit because the ruvector retrieval path (AETHER re-ID, sketch prefilter) is exactly an ANN problem and SymphonyQG is CPU/edge-portable like our deployment. |
-| **2** | **Multi-bit / Extended RaBitQ** | Extends our existing **1-bit** `sketch.rs` (ADR-084) to multiple bits per dimension — precisely the "Pass 2" our own `sketch.rs` doc deferred (1-bit sign quantization ships first; rotation/more-bits "later if benchmark-measured top-K coverage drops below the ADR-084 90% threshold"). | **MEASURED-on-our-hardware** (was CLAIMED) — Pass-2 rotation + multi-bit Pass-3 implemented and benchmarked; see §10. Rotation lifts strict-bar coverage 36%→46% and clears 90% only with ~3× over-fetch; multi-bit (≤4-bit) reaches 74% at the strict bar — both **short of the strict 90% bar** on the tested distribution. | **DONE — RESOLVED-PARTIAL.** Built and MEASURED (§10). The honest negative (no strict-bar 90% from rotation or ≤4-bit) is recorded, not hidden. Over-fetch + Pass-2 is the path that meets the bar; that matches ADR-084's "candidate set" deployment pattern. |
+| **2** | **Multi-bit / Extended RaBitQ + unbiased estimator** | Extends our existing **1-bit** `sketch.rs` (ADR-084): Pass-2 rotation, multi-bit Pass-3, and the **real RaBitQ unbiased distance estimator** (Gao & Long SIGMOD 2024) reranking the candidate set from the 1-bit code + 8 B/vec side info (§11). | **MEASURED-on-our-hardware** (was CLAIMED) — rotation (§10), multi-bit (§10), and the estimator (§11) all implemented + benchmarked. Rotation lifts strict-K 36%→46%; multi-bit (≤4-bit) reaches 74% strict; **the estimator reaches 49.71% strict (cosine rerank), still short of 90%.** All clear 90% only with over-fetch (estimator improves the factor: 95% at candidate_k=24 vs sign 91.6%). | **DONE — RESOLVED-PARTIAL / NEGATIVE.** Rotation (§10) + estimator (§11) built and MEASURED. The honest negative (no strict-bar 90% from rotation, ≤4-bit, **or the unbiased estimator**) is recorded, not hidden. Over-fetch + Pass-2 is the path that meets the bar (ADR-084's "candidate set" pattern); the estimator lowers the over-fetch factor needed. |
 | **3** | **GraphPose-Fi-style learned antenna-attention + ChebGConv fusion head** | Would replace the current **untrained identity-projection + mean-pool** "attention" (the `CrossViewpointAttention` default is `ProjectionWeights::identity` — not a *learned* attention) with a learned graph fusion head. | **DATA-GATED** (per ADR-152 measurement (b): architecture is **NOT** the current bottleneck — **data is**) | **ACCEPTED-future, data-gated. Do NOT build now.** ADR-152's measured lesson was that swapping architecture without more/better paired data does not move PCK. Building a learned fusion head before the data exists would repeat the mistake ADR-155 §5 also flagged for GraphPose-Fi. |
 | — | **Cramér-Rao / sensor-placement** (`geometry.rs` CRB) | Investigated for a 2026 advance beating the textbook Fisher-information CRB already implemented. | **Investigated — NO ACTION** | **Cleared honestly.** No 2026 method beats the closed-form Fisher-information CRB for this 2-D bearing problem; our implementation is already correct SOTA. (Recording a negative result is a deliberate anti-slop signal.) The only CRB change this milestone is the §2.3 *GDOP* honesty fix, which is a labelling/quantity correction, not an algorithmic one. |
 
@@ -202,6 +202,64 @@ Test machine: Windows 11, `cargo bench --release` / `cargo test`. Fixture: **dim
 
 ### 10.5 Deferred sub-items (graded, not dropped)
 
-- **Strict-bar 90% from a richer code** — neither rotation nor uniform multi-bit closes it here. A learned/asymmetric quantizer or the full RaBitQ residual-distance estimator (not just a uniform scalar code) might, but is unbuilt and **unmeasured** — explicitly deferred, not claimed.
+- **Strict-bar 90% from a richer code** — neither rotation nor uniform multi-bit closes it here. A learned/asymmetric quantizer or the full RaBitQ residual-distance estimator (not just a uniform scalar code) might. **RESOLVED-NEGATIVE (§11): the estimator is now built and MEASURED — it lifts strict-K 46.39%→49.71% but does NOT clear the 90% strict bar.** The residual strict-bar gap is a published negative, not a deferral.
 - **Distribution sensitivity** — the result is for one synthetic anisotropic distribution; on real AETHER traces the strict-bar number may differ. Re-measuring on recorded embeddings is deferred to the ADR-084 post-merge soak.
 - **Promoting a `MultiBitSketch` type** — the multi-bit code lives in the measurement harness, not as a shipped sketch type. Building the production type is gated on a use site actually needing strict-K (vs over-fetch), which the measurement says is not required today.
+
+---
+
+## 11. RaBitQ unbiased distance estimator — IMPLEMENTED & MEASURED (Milestone-2, §8 backlog item #2 / §10.5 strict-bar item)
+
+Milestone-2 of the §8 backlog. Status: **RESOLVED-NEGATIVE** — the estimator is built, measured, and lifts strict-K coverage, but the honest result is that it does **not** clear the ADR-084 ≥90% strict-K bar on this distribution. The negative is reported as such, exactly like the Pass-2 rotation result.
+
+### 11.1 What landed
+
+- **`crates/wifi-densepose-ruvector/src/estimator.rs`** (new) — the real Gao & Long (SIGMOD 2024) contribution: an **unbiased estimator of the inner product / squared distance** recovered from the 1-bit code plus per-vector side info, on top of the Pass-2 rotation. Pass-1/Pass-2 ranked candidates by raw Hamming over sign bits — a coarse proxy. This module reranks by the unbiased estimate.
+  - `EstimatorSketch` — Pass-2 sign code (over the **padded** FHT length `D = next_pow2(dim)`, the frame `x̄` is unit in) **plus** the side info.
+  - `SideInfo` = `{ residual_norm: f32, x_dot_o: f32 }` = **8 bytes/vector** (2× f32).
+  - `EstimatorQuery` — query rotated once, reused across all candidates.
+  - `DistanceEstimator` — `estimate_inner_product`, `estimate_sq_distance`, `ranking_key` (euclidean), `cosine_ranking_key` (the correct key vs a cosine ground truth — needs only the code + `x_dot_o`).
+  - `EstimatorBank` — `topk_estimated` (euclidean) / `topk_estimated_cosine`; optional `with_centroid` (the paper's centroid path).
+- **`coverage.rs`** — `measure_estimator` (cosine rerank) + `measure_estimator_euclidean`, on the **bit-identical** fixture / cluster centres / query stream / cosine ground truth as `measure_pass1`/`measure_pass2`. Single source of truth for the §11.3 table; backs both `estimator_coverage_report` and the `sketch_bench` coverage table.
+- **Additive + backward-compatible.** New types only; Pass-1 `Sketch` / Pass-2 `SketchBank` / `WireSketch` wire format are untouched. All external callers (`event_log.rs`, `signal/longitudinal.rs`, `sensing-server`) use Pass-1 `from_embedding` and are unaffected.
+
+### 11.2 The estimator formula (and the zero-centroid simplification, stated honestly)
+
+Let `P` be the Pass-2 orthogonal rotation (`R = H·D`), `D = next_pow2(dim)`. For data `o_raw`, query `q_raw`, centroid `c`:
+
+1. **Centroid — SIMPLIFIED to zero/global `c = 0`.** The paper centres on a per-cluster centroid (`o_r = o_raw − c`); we use `c = 0` (`o_r = o_raw`), because the current sketch path has no IVF/k-means cluster structure. This costs accuracy when the data is far off-origin. **We document it, do not hide it,** and built the paper-faithful centroid path (`from_embedding_centred` / `EstimatorBank::with_centroid`) so the simplification is a measured choice, not an assumption. (We do **not** report a centroid coverage number against the *cosine* ground truth: centroid-subtraction changes the metric — cosine-of-residual ≠ cosine-of-raw — so a centroid number vs raw-cosine truth would be a metric mismatch, itself dishonest. Zero-centroid is the correct match for this raw-cosine harness.)
+2. **Unit residual + 1-bit code.** `o = o_r/‖o_r‖`, `o' = P·o`, code `x̄_i = sign(o'_i)·(1/√D)` — a unit vector at the nearest hypercube corner.
+3. **Side info:** `residual_norm = ‖o_r‖` and `x_dot_o = ⟨x̄, o'⟩ ∈ (0,1]` (the paper's `⟨x̄, o⟩`).
+4. **Unbiased estimator** (paper Eq.): `⟨o', q'⟩ ≈ ⟨x̄, q'⟩ / ⟨x̄, o'⟩ = ⟨x̄, q'⟩ / x_dot_o`. The random rotation makes the code's quantization error orthogonal **in expectation** to `q'`, so the rescale is unbiased (paper's `O(1/√D)` bound). Per candidate: one length-`D` signed sum (`x̄ ∈ {±1/√D}`), as cheap as Hamming + a multiply.
+5. **Distance / cosine.** `⟨o_r,q_r⟩ = ‖o_r‖·(⟨x̄,q'⟩/x_dot_o)`; `‖q_r−o_r‖² = ‖q_r‖²+‖o_r‖²−2⟨o_r,q_r⟩`. For a **cosine** ground truth (AETHER / this harness), rank by `−⟨o,q_r⟩ = −(⟨x̄,q'⟩/x_dot_o)` (needs only the code + `x_dot_o`).
+
+**Unbiasedness is pinned** (`estimator_unbiased_on_fixture`): averaging the estimate of `⟨o_r,q_r⟩` over 4000 random rotation seeds converges to the true inner product within ~6% of the `‖o‖‖q‖` envelope — a biased estimator (or sign-only proxy) would be systematically off.
+
+### 11.3 MEASURED strict-K coverage
+
+Same fixture/seeds as §10 (dim=128, N=2048, K=8, 64 clusters, noise=0.35, 128 queries, `master_seed=0xAD000084`, `rotation_seed=0x5EEDC0DE12345678`), cosine ground truth. Reproduce: `cargo test -p wifi-densepose-ruvector --no-default-features estimator_coverage_report -- --nocapture` or `cargo bench -p wifi-densepose-ruvector --bench sketch_bench -- pass2_coverage`.
+
+| candidate_k | Pass-1 (sign) | Pass-2 (sign) | **Pass-2 + estimator (cosine)** | Pass-2 + estimator (euclid) | vs 90% bar |
+|---|---|---|---|---|---|
+| **8 (= K, strict bar)** | 36.13% | 46.39% | **49.71%** | 49.02% | **all BELOW** |
+| 16 | 62.79% | 75.59% | 79.20% | 77.93% | below |
+| 24 | 83.89% | 91.60% | **95.12%** | 93.65% | estimator clears |
+| 32 | 100.00% | 100.00% | 100.00% | 100.00% | clears |
+| 64 | 100.00% | 100.00% | 100.00% | 100.00% | clears |
+
+Side-info memory overhead: **8 bytes/vector** (2× f32) on top of the 16 B/vec 1-bit sketch.
+
+### 11.4 Honest verdict
+
+- **The estimator helps, and the cosine key beats the euclidean key** (49.71% vs 49.02% at strict-K; cosine is the apples-to-apples match for the cosine ground truth — both it and sign-Hamming are angular). The unbiased rescale is a real, consistent lift at every over-fetch level (e.g. 24: 91.60%→95.12%).
+- **It does NOT clear the strict candidate_k==K 90% bar.** Strict-K goes 36.13% (Pass-1) → 46.39% (Pass-2-sign) → **49.71% (Pass-2 + estimator)** — a **+3.3 pp** improvement over sign-only, **still ~40 pp short of 90%**. This is a **published negative**, the same class of honest result as the Pass-2 rotation (§10).
+- **Why the strict-K gain is modest:** the binding constraint at strict K is the **1-bit code's information ceiling** (resolving 8-of-2048 from a single sign bit per coordinate), not the *estimator's variance* — the estimator sharpens the ranking but cannot add information the 1-bit code never captured. The estimator's larger wins are at over-fetch, where there is room to re-rank a wider candidate pool.
+- **The bar is still met the way ADR-084 deploys the sensor:** at candidate_k=24 (~3× over-fetch) the estimator reaches **95.12%** (vs Pass-2-sign 91.60%) — the "candidate set, then full refinement" pattern. The estimator **improves the over-fetch factor needed** but does not eliminate it.
+- **No benchmark was tuned to manufacture a pass.** The strict-bar gap is documented, not spun.
+
+### 11.5 Pinning tests
+
+- `estimator::estimator_is_deterministic` — fixed seed ⇒ identical estimate + identical bank top-K.
+- `estimator::estimator_unbiased_on_fixture` — Monte-Carlo mean over 4000 seeds converges to the true inner product within tolerance (the unbiasedness claim).
+- `coverage::estimator_rerank_not_worse_than_sign` — estimator-reranked coverage ≥ sign-only Pass-2 on a fixed fixture (must not regress).
+- Plus: `estimator_self_distance_is_small`, `x_dot_o_in_unit_range`, `zero_input_does_not_panic`, `bank_self_query_ranks_self_first`, `centroid_path_self_query_ranks_self_first`, `centroid_zero_matches_default`, `estimator_coverage_is_deterministic`.
diff --git a/v2/crates/wifi-densepose-ruvector/benches/sketch_bench.rs b/v2/crates/wifi-densepose-ruvector/benches/sketch_bench.rs
index fdc70c66..64e4e244 100644
--- a/v2/crates/wifi-densepose-ruvector/benches/sketch_bench.rs
+++ b/v2/crates/wifi-densepose-ruvector/benches/sketch_bench.rs
@@ -185,17 +185,25 @@ fn bench_topk(c: &mut Criterion) {
 /// reads it back, so the criterion timing is meaningless here on purpose — the
 /// value is the `println!` summary.
 fn bench_pass2_coverage(c: &mut Criterion) {
-    use wifi_densepose_ruvector::coverage::{measure_pass1, measure_pass2, CoverageParams};
+    use wifi_densepose_ruvector::coverage::{
+        measure_estimator, measure_estimator_euclidean, measure_pass1, measure_pass2,
+        CoverageParams,
+    };
 
     let base = CoverageParams::aether_default(0xAD00_0084);
     let rot_seed = 0x5EED_C0DE_1234_5678u64;
 
-    println!("\n=== ADR-156 §8 RaBitQ Pass-2 coverage (anisotropic planted clusters) ===");
+    println!("\n=== ADR-156 §8/§11 RaBitQ coverage (anisotropic planted clusters) ===");
     println!(
         "dim={} N={} K={} clusters={} noise={} queries={} master_seed=0x{:X} rot_seed=0x{:X}",
         base.dim, base.n, base.k, base.n_clusters, base.noise, base.n_queries, base.seed, rot_seed
     );
     println!("(coverage = |sketch_topK ∩ float_cosine_topK| / K, ADR-084 bar = 90%)");
+    println!("estimator side info = 8 B/vec (residual_norm + x_dot_o, 2x f32)");
+    println!(
+        "  {:<12} {:>8} {:>8} {:>11} {:>11}",
+        "candidate_k", "P1-sign", "P2-sign", "Est-cosine", "Est-euclid"
+    );
     for &cand in &[8usize, 16, 24, 32, 64] {
         let p = CoverageParams {
             candidate_k: cand,
@@ -203,11 +211,17 @@ fn bench_pass2_coverage(c: &mut Criterion) {
         };
         let p1 = measure_pass1(p).coverage;
         let p2 = measure_pass2(p, rot_seed).coverage;
-        let flag = if p2 >= 0.90 { "Pass2≥90%" } else { "" };
+        let est_cos = measure_estimator(p, rot_seed).coverage;
+        let est_euc = measure_estimator_euclidean(p, rot_seed).coverage;
+        let flag = if est_cos >= 0.90 { "EST≥90%" } else { "" };
+        let strict = if cand == base.k { " STRICT" } else { "" };
         println!(
-            "  candidate_k={cand:<3}  Pass1={:6.2}%  Pass2={:6.2}%  {flag}",
+            "  {:<12} {:>7.2}% {:>7.2}% {:>10.2}% {:>10.2}%  {flag}{strict}",
+            cand,
             p1 * 100.0,
-            p2 * 100.0
+            p2 * 100.0,
+            est_cos * 100.0,
+            est_euc * 100.0
         );
     }
     println!("========================================================================\n");
diff --git a/v2/crates/wifi-densepose-ruvector/src/coverage.rs b/v2/crates/wifi-densepose-ruvector/src/coverage.rs
index a78467ab..28de9869 100644
--- a/v2/crates/wifi-densepose-ruvector/src/coverage.rs
+++ b/v2/crates/wifi-densepose-ruvector/src/coverage.rs
@@ -33,6 +33,7 @@
 //! value derives from a seed via SplitMix64, so the whole harness is
 //! reproducible bit-for-bit.
 
+use crate::estimator::EstimatorBank;
 use crate::{Rotation, SketchBank};
 
 /// SplitMix64 step — reproducible PRNG for fixture generation (dependency-free).
@@ -205,6 +206,80 @@ pub fn measure_pass2(p: CoverageParams, rotation_seed: u64) -> CoverageResult {
     measure_inner(p, Some(rot))
 }
 
+/// Measure mean top-K coverage of the **RaBitQ unbiased estimator** rerank
+/// (ADR-156 Milestone-2) against the full-float top-K, on the **same**
+/// anisotropic synthetic fixture and query stream as [`measure_pass1`] /
+/// [`measure_pass2`].
+///
+/// This is the whole point of Milestone-2: instead of ranking candidates by
+/// raw Hamming over sign bits ([`measure_pass2`]), rank them by the RaBitQ
+/// *unbiased distance estimate* recovered from the 1-bit code + per-vector side
+/// info ([`crate::estimator`]). `rotation_seed` fixes the rotation (index and
+/// query share it). The fixture, cluster centres, query draws, and ground-truth
+/// cosine top-K are **bit-identical** to `measure_pass2`, so the only variable
+/// is sign-Hamming vs estimator-rerank — an honest apples-to-apples coverage
+/// comparison.
+pub fn measure_estimator(p: CoverageParams, rotation_seed: u64) -> CoverageResult {
+    // Cosine ground truth ⇒ rerank by the estimated COSINE key (the angular
+    // sensor's natural metric). See `measure_estimator_euclidean` for the
+    // squared-euclidean key, reported alongside for honesty.
+    measure_estimator_inner(p, rotation_seed, EstimatorRank::Cosine)
+}
+
+/// Same as [`measure_estimator`] but reranks by the estimated **squared
+/// euclidean** distance key instead of cosine. Reported alongside the cosine
+/// rerank so the ADR shows both honestly: against a *cosine* ground truth, the
+/// cosine key is the apples-to-apples comparison to sign-Hamming (also angular),
+/// while the euclidean key mixes in residual-norm and generally ranks worse here.
+pub fn measure_estimator_euclidean(p: CoverageParams, rotation_seed: u64) -> CoverageResult {
+    measure_estimator_inner(p, rotation_seed, EstimatorRank::Euclidean)
+}
+
+#[derive(Clone, Copy)]
+enum EstimatorRank {
+    Cosine,
+    Euclidean,
+}
+
+fn measure_estimator_inner(
+    p: CoverageParams,
+    rotation_seed: u64,
+    rank: EstimatorRank,
+) -> CoverageResult {
+    let rot = Rotation::new(rotation_seed, p.dim);
+    let float_bank = make_fixture(p);
+    let centres = cluster_centres(p.dim, p.n_clusters.max(1), p.seed);
+
+    // Estimator bank over the SAME fixture vectors.
+    let mut bank = EstimatorBank::new(rot);
+    for (i, v) in float_bank.iter().enumerate() {
+        bank.insert_embedding(i as u32, v);
+    }
+
+    let mut total = 0.0f64;
+    for q in 0..p.n_queries {
+        // IDENTICAL query draw to measure_inner (same seed expression).
+        let c = q % p.n_clusters.max(1);
+        let qv = realize(
+            &centres[c],
+            p.dim,
+            p.noise,
+            p.seed ^ 0xDEAD_0000_0000 ^ (q as u64).wrapping_mul(0x2545_F491),
+        );
+        let truth = float_topk(&float_bank, &qv, p.k);
+        let cand = match rank {
+            EstimatorRank::Cosine => bank.topk_estimated_cosine(&qv, p.candidate_k),
+            EstimatorRank::Euclidean => bank.topk_estimated(&qv, p.candidate_k),
+        };
+        let cand_ids: std::collections::HashSet<u32> = cand.into_iter().map(|(id, _)| id).collect();
+        let hit = truth.iter().filter(|id| cand_ids.contains(id)).count();
+        total += hit as f64 / p.k as f64;
+    }
+    CoverageResult {
+        coverage: total / p.n_queries as f64,
+    }
+}
+
 /// Measure mean top-K coverage of a **multi-bit (Pass-3)** rotated sketch:
 /// `bits` bits per dimension instead of 1, ranked by L1 distance over the
 /// per-dim codes (the natural multi-bit generalization of hamming). This is the
@@ -409,6 +484,92 @@ mod tests {
         );
     }
 
+    #[test]
+    fn estimator_rerank_not_worse_than_sign() {
+        // ADR-156 Milestone-2 core regression: on a fixed anisotropic fixture,
+        // reranking the candidate set by the RaBitQ unbiased ESTIMATE must be
+        // >= ranking by sign-only Hamming (Pass-2). The estimator must never
+        // make coverage WORSE — it strictly refines the same 1-bit codes with
+        // side info. (We assert >= here, not a hard 90% bar — the bar is the
+        // measured number reported in the ADR, not a unit invariant.)
+        let p = CoverageParams {
+            n: 512,
+            n_queries: 64,
+            n_clusters: 32,
+            ..CoverageParams::aether_default(0x00C0_FFEE)
+        };
+        let rot_seed = 0x1234_5678_9ABC_DEF0u64;
+        let sign = measure_pass2(p, rot_seed).coverage;
+        let est = measure_estimator(p, rot_seed).coverage;
+        assert!(
+            est + 1e-9 >= sign,
+            "estimator rerank coverage {est:.4} regressed below sign-only Pass-2 {sign:.4}"
+        );
+    }
+
+    #[test]
+    fn estimator_coverage_is_deterministic() {
+        // Same params + rotation seed ⇒ same measured coverage, twice.
+        let p = CoverageParams {
+            n: 256,
+            n_queries: 16,
+            n_clusters: 16,
+            ..CoverageParams::aether_default(0xE571_3A7E)
+        };
+        let a = measure_estimator(p, 0xFEED_FACE_0000_0001).coverage;
+        let b = measure_estimator(p, 0xFEED_FACE_0000_0001).coverage;
+        assert_eq!(a, b, "estimator coverage must be deterministic");
+        assert!((0.0..=1.0).contains(&a));
+    }
+
+    /// Deterministic, test-runnable coverage measurement that PRINTS the
+    /// Milestone-2 strict-K table: Pass-1 | Pass-2-sign | Pass-2+estimator, at
+    /// the strict bar (candidate_k == K) plus the over-fetch curve. Run with:
+    ///   cargo test -p wifi-densepose-ruvector --no-default-features \
+    ///     estimator_coverage_report -- --nocapture
+    #[test]
+    fn estimator_coverage_report() {
+        let base = CoverageParams::aether_default(0xAD00_0084);
+        let rot_seed = 0x5EED_C0DE_1234_5678u64;
+        println!(
+            "\n=== ADR-156 Milestone-2 RaBitQ estimator coverage (anisotropic synthetic) ==="
+        );
+        println!(
+            "dim={} N={} K={} queries={} clusters={} noise={} master_seed=0x{:X} rotation_seed=0x{:X}",
+            base.dim, base.n, base.k, base.n_queries, base.n_clusters, base.noise, base.seed, rot_seed
+        );
+        println!("side info = 8 B/vec (residual_norm + x_dot_o, 2x f32)");
+        println!(
+            "{:<12} {:>9} {:>9} {:>11} {:>11} {:>9}",
+            "candidate_k", "P1-sign", "P2-sign", "Est-cosine", "Est-euclid", "vs 90%"
+        );
+        for &c in &[base.k, 16usize, 24, 32, 64] {
+            let pc = CoverageParams {
+                candidate_k: c,
+                ..base
+            };
+            let p1 = measure_pass1(pc).coverage;
+            let p2 = measure_pass2(pc, rot_seed).coverage;
+            let est_cos = measure_estimator(pc, rot_seed).coverage;
+            let est_euc = measure_estimator_euclidean(pc, rot_seed).coverage;
+            let bar = if est_cos >= 0.90 { "EST≥90%" } else { "below" };
+            let strict = if c == base.k { " (STRICT)" } else { "" };
+            println!(
+                "{:<12} {:>8.2}% {:>8.2}% {:>10.2}% {:>10.2}% {:>9}{}",
+                c,
+                p1 * 100.0,
+                p2 * 100.0,
+                est_cos * 100.0,
+                est_euc * 100.0,
+                bar,
+                strict
+            );
+        }
+        println!("============================================================================\n");
+        let strict = measure_estimator(base, rot_seed).coverage;
+        assert!((0.0..=1.0).contains(&strict));
+    }
+
     #[test]
     fn fixture_is_deterministic() {
         let p = CoverageParams::aether_default(12345);
diff --git a/v2/crates/wifi-densepose-ruvector/src/estimator.rs b/v2/crates/wifi-densepose-ruvector/src/estimator.rs
new file mode 100644
index 00000000..36ee5467
--- /dev/null
+++ b/v2/crates/wifi-densepose-ruvector/src/estimator.rs
@@ -0,0 +1,685 @@
+//! RaBitQ **unbiased distance estimator** — the real Gao & Long (SIGMOD 2024)
+//! contribution, on top of the Pass-2 rotation ([`crate::rotation`]).
+//!
+//! ## Why this exists (ADR-156 Milestone-2)
+//!
+//! Pass-1 ([`crate::sketch`]) and Pass-2 ([`crate::rotation`]) use only the
+//! **sign** of each rotated coordinate and rank candidates by **Hamming /
+//! bit distance** — a coarse, monotone-but-lossy proxy for the true angle.
+//! ADR-156 §10 measured that sign-only Pass-2 leaves strict-K
+//! (`candidate_k == K`) top-K coverage at **~46%**, well below the ADR-084
+//! **≥90%** bar, and only clears 90% with ~3× over-fetch.
+//!
+//! RaBitQ's *actual* algorithmic contribution is not the sign bits — it is an
+//! **unbiased estimator of the inner product / squared distance** recovered
+//! from the 1-bit code **plus a few bytes of per-vector side information**.
+//! That estimate is far sharper than the raw Hamming proxy, so it can
+//! **rerank** the candidate set and (the question this module measures) close
+//! the strict-K coverage gap.
+//!
+//! ## The estimator (paper formula + our simplification, stated honestly)
+//!
+//! Notation follows the paper. Let `P` be the Pass-2 orthogonal rotation
+//! ([`crate::Rotation`], `R = H·D`). For a data vector `o_raw` and a query
+//! `q_raw`:
+//!
+//! 1. **Centroid.** The paper centres each vector on its (per-cluster)
+//!    centroid `c`: residual `o_r = o_raw − c`. **We use a zero / global
+//!    centroid `c = 0`** (`o_r = o_raw`). This is an explicit simplification
+//!    (no IVF/k-means cluster structure in the current sketch path) — it costs
+//!    accuracy when the data is far off-origin, and we document it rather than
+//!    hide it. With `c = 0`, the residual *is* the raw vector.
+//!
+//! 2. **Unit residual + 1-bit code.** `o = o_r / ‖o_r‖`. Rotate:
+//!    `o' = P·o`. The 1-bit code is `x̄_i = sign(o'_i) · (1/√D)`, so `x̄`
+//!    is a **unit vector** in `{±1/√D}^D` (the corner of the hypercube nearest
+//!    `o'`). `D` is the rotation's padded dimension (`next_pow2(dim)`), because
+//!    the FHT operates on the padded length and `x̄` is unit over that length.
+//!
+//! 3. **Per-vector side information** (the "few bytes"): we store, per sketch,
+//!    - `residual_norm = ‖o_r‖` (an `f32`), and
+//!    - `x_dot_o = ⟨x̄, o'⟩` (an `f32`), the cosine between the code and the
+//!      rotated unit residual. This is the quantity the paper calls `⟨x̄, o⟩`
+//!      (after rotation); it lies in `(0, 1]` and is `1` only when `o'`
+//!      already sits exactly on a hypercube corner.
+//!
+//!    That is **8 bytes/vector** of side info (2× `f32`).
+//!
+//! 4. **Query-time estimate.** Rotate the query residual: `q' = P·q_r`. The
+//!    **unbiased estimator of `⟨o', q'⟩`** (equivalently `⟨o, q_r⟩`, since `P`
+//!    is orthogonal) is
+//!
+//!    ```text
+//!        ⟨o', q'⟩  ≈  ⟨x̄, q'⟩ / ⟨x̄, o'⟩  =  ⟨x̄, q'⟩ / x_dot_o
+//!    ```
+//!
+//!    This is RaBitQ Eq. (in the paper, the estimator `<q, o> ≈ <q̄, ...>`):
+//!    the random rotation makes the quantization error of `x̄` (relative to
+//!    `o'`) orthogonal **in expectation** to `q'`, so dividing the measured
+//!    `⟨x̄, q'⟩` by `x_dot_o` is **unbiased** for `⟨o', q'⟩`, with the paper's
+//!    `O(1/√D)` error bound. The only per-candidate cost is one length-`D`
+//!    dot product `⟨x̄, q'⟩` — which, because `x̄ ∈ {±1/√D}`, is just a signed
+//!    sum of the query coordinates (`±` chosen by the stored sign bits),
+//!    i.e. as cheap as the Hamming proxy plus one multiply.
+//!
+//! 5. **Inner product and squared distance.** Un-normalize:
+//!    `⟨o_r, q_r⟩ = ‖o_r‖ · ⟨o, q_r⟩`. Then
+//!
+//!    ```text
+//!        ‖q_r − o_r‖²  =  ‖q_r‖²  +  ‖o_r‖²  −  2·⟨o_r, q_r⟩
+//!    ```
+//!
+//!    For **ranking** a candidate set against one fixed query, `‖q_r‖²` is a
+//!    per-query constant and can be dropped; we keep it in
+//!    [`DistanceEstimator::estimate_sq_distance`] so the value is a genuine
+//!    distance estimate (used by the unbiasedness test), and expose the
+//!    cheaper ranking key separately.
+//!
+//! ## What is unbiased, and what we measure
+//!
+//! The estimator of `⟨o', q'⟩` is unbiased over the random rotation. We pin
+//! that on a small hand-checkable fixture (`estimator_unbiased_on_fixture`):
+//! averaging the estimate over many random rotation seeds converges to the true
+//! inner product within tolerance. We then measure whether **reranking the
+//! candidate set by this estimate** closes the strict-K coverage gap that the
+//! sign-only Pass-2 left at ~46% — reported honestly in ADR-156 §10 / §11
+//! whether it clears 90% or not.
+//!
+//! ## Backward compatibility
+//!
+//! This module is **purely additive**. It introduces an *extended* sketch type
+//! ([`EstimatorSketch`]) and bank ([`EstimatorBank`]) that carry the side info;
+//! the Pass-1 [`crate::Sketch`] / Pass-2 [`crate::SketchBank`] paths and the
+//! [`crate::WireSketch`] wire format are **untouched**. Nothing on the existing
+//! surface changes.
+
+use crate::rotation::{next_pow2, Rotation};
+
+/// The per-vector side information RaBitQ needs to turn a 1-bit code into an
+/// **unbiased** distance estimate (§ module docs step 3).
+///
+/// Two `f32`s = **8 bytes/vector** on top of the packed sign bits.
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct SideInfo {
+    /// `‖o_r‖` — L2 norm of the (zero-centroid) residual = the raw vector norm.
+    pub residual_norm: f32,
+    /// `⟨x̄, o'⟩` — dot product of the unit 1-bit code with the rotated unit
+    /// residual. In `(0, 1]`; the paper's `⟨x̄, o⟩`. Drives the unbiased
+    /// rescaling `⟨x̄, q'⟩ / x_dot_o`.
+    pub x_dot_o: f32,
+}
+
+/// A Pass-2 sketch **plus** the RaBitQ side information, sufficient to compute
+/// the unbiased distance estimate at query time.
+///
+/// Stores the packed sign bits over the **padded** rotation length `D`
+/// (`next_pow2(dim)`) — the frame `x̄` actually lives in — together with the
+/// [`SideInfo`]. Construct via [`EstimatorSketch::from_embedding`]; the index
+/// and the query **must** use the same [`Rotation`] (same seed + dim), exactly
+/// as for a Pass-2 sketch.
+#[derive(Debug, Clone)]
+pub struct EstimatorSketch {
+    /// Sign bits of the rotated *padded* unit residual, MSB-first per byte.
+    /// Length is `ceil(D / 8)` where `D = next_pow2(dim)`. Bit set ⇒ `o'_i ≥ 0`
+    /// ⇒ code coordinate `+1/√D`; clear ⇒ `−1/√D`.
+    bits: Vec<u8>,
+    /// Padded rotation dimension `D = next_pow2(dim)`; the code is unit over `D`.
+    padded_dim: usize,
+    /// Source embedding dimension (for compatibility checks / reporting).
+    embedding_dim: usize,
+    /// The RaBitQ side info for the unbiased estimate.
+    side: SideInfo,
+}
+
+impl EstimatorSketch {
+    /// Build an estimator sketch from a dense embedding and a [`Rotation`].
+    ///
+    /// Zero-centroid (`c = 0`): the residual is the raw embedding. The vector is
+    /// rotated through `rotation` over its padded length `D = next_pow2(dim)`,
+    /// the sign of each rotated coordinate is packed, and the side info
+    /// (`‖o_r‖`, `⟨x̄, o'⟩`) is computed in the same pass.
+    ///
+    /// A zero (or all-equal-to-its-own-mean) input yields `residual_norm = 0`;
+    /// its estimate degenerates to `0` (handled in
+    /// [`EstimatorBank`]) rather than dividing by zero.
+    pub fn from_embedding(embedding: &[f32], rotation: &Rotation) -> Self {
+        Self::from_embedding_centred(embedding, rotation, None)
+    }
+
+    /// Build an estimator sketch with an **explicit centroid** `c` subtracted
+    /// before rotation (the paper's per-cluster centroid; `o_r = o_raw − c`).
+    ///
+    /// Pass `None` for the zero-centroid simplification (`c = 0`, identical to
+    /// [`EstimatorSketch::from_embedding`]). Pass `Some(centroid)` (length `dim`)
+    /// to centre on a shared global / cluster centroid — the index and the query
+    /// **must** use the *same* centroid, exactly as they must share the rotation.
+    /// This path exists so ADR-156 can **measure the cost of the zero-centroid
+    /// simplification** honestly rather than assert it.
+    pub fn from_embedding_centred(
+        embedding: &[f32],
+        rotation: &Rotation,
+        centroid: Option<&[f32]>,
+    ) -> Self {
+        let dim = rotation.dim();
+        let padded = next_pow2(dim);
+        // Residual o_r = o_raw − c (c = 0 when centroid is None). Build it once.
+        let residual: Vec<f32> = (0..dim)
+            .map(|i| {
+                let v = embedding.get(i).copied().unwrap_or(0.0);
+                let c = centroid.and_then(|c| c.get(i)).copied().unwrap_or(0.0);
+                v - c
+            })
+            .collect();
+        let residual_norm = {
+            let mut acc = 0.0f64;
+            for &v in &residual {
+                acc += (v as f64) * (v as f64);
+            }
+            acc.sqrt() as f32
+        };
+
+        // Rotate the RESIDUAL over the PADDED length so the code frame matches
+        // what `x_dot_o` and the query dot product use.
+        let rotated_padded = rotation.apply_padded(&residual);
+        debug_assert_eq!(rotated_padded.len(), padded);
+
+        // 1-bit code over the padded length: x̄_i = sign(o'_i)/√D on the *unit*
+        // residual. Since o' = P·o = P·(o_r/‖o_r‖) = (P·o_r)/‖o_r‖, and sign is
+        // scale-invariant, sign(o'_i) == sign((P·o_r)_i) == sign(rotated_padded_i).
+        // ⟨x̄, o'⟩ = (1/√D)·Σ sign(o'_i)·o'_i = (1/√D)·Σ |o'_i|
+        //         = (1/√D)·(Σ|(P·o_r)_i|) / ‖o_r‖.
+        let inv_sqrt_d = 1.0f32 / (padded as f32).sqrt();
+        let mut bits = vec![0u8; padded.div_ceil(8)];
+        let mut sum_abs = 0.0f64; // Σ |(P·o_r)_i|
+        for (i, &c) in rotated_padded.iter().enumerate() {
+            if c >= 0.0 {
+                bits[i / 8] |= 1 << (7 - (i % 8));
+            }
+            sum_abs += (c as f64).abs();
+        }
+        // ⟨x̄, o'⟩ with o' the rotated *unit* residual.
+        let x_dot_o = if residual_norm > 0.0 {
+            (inv_sqrt_d as f64 * sum_abs / residual_norm as f64) as f32
+        } else {
+            0.0
+        };
+
+        Self {
+            bits,
+            padded_dim: padded,
+            embedding_dim: dim,
+            side: SideInfo {
+                residual_norm,
+                x_dot_o,
+            },
+        }
+    }
+
+    /// The padded rotation dimension `D` the code lives in.
+    #[inline]
+    pub fn padded_dim(&self) -> usize {
+        self.padded_dim
+    }
+
+    /// Source embedding dimension.
+    #[inline]
+    pub fn embedding_dim(&self) -> usize {
+        self.embedding_dim
+    }
+
+    /// The RaBitQ side information.
+    #[inline]
+    pub fn side_info(&self) -> SideInfo {
+        self.side
+    }
+
+    /// `‖o_r‖` of the residual (zero-centroid ⇒ raw vector norm).
+    #[inline]
+    pub fn residual_norm(&self) -> f32 {
+        self.side.residual_norm
+    }
+
+    /// Side-information byte cost (excluding the packed sign bits): 8 bytes.
+    pub const SIDE_INFO_BYTES: usize = 2 * std::mem::size_of::<f32>();
+
+    /// `⟨x̄, q'⟩` — the dot product of this sketch's unit 1-bit code with a
+    /// rotated query `q'` (length `padded_dim`). Because `x̄_i = ±1/√D`, this is
+    /// `(1/√D)·Σ ±q'_i` with the sign taken from the stored bit. The single
+    /// per-candidate cost of the estimator.
+    #[inline]
+    fn code_dot(&self, q_rotated_padded: &[f32]) -> f32 {
+        debug_assert_eq!(q_rotated_padded.len(), self.padded_dim);
+        let inv_sqrt_d = 1.0f32 / (self.padded_dim as f32).sqrt();
+        let mut acc = 0.0f32;
+        for (i, &q) in q_rotated_padded.iter().enumerate() {
+            let bit = (self.bits[i / 8] >> (7 - (i % 8))) & 1;
+            if bit == 1 {
+                acc += q;
+            } else {
+                acc -= q;
+            }
+        }
+        acc * inv_sqrt_d
+    }
+}
+
+/// A pre-rotated query, computed **once** per query and reused across all
+/// candidates. Carries `q' = P·q_r` (over the padded length) and `‖q_r‖²`.
+#[derive(Debug, Clone)]
+pub struct EstimatorQuery {
+    /// `q' = P·q_r` over the padded rotation length.
+    q_rotated_padded: Vec<f32>,
+    /// `‖q_r‖²` — per-query constant in the squared-distance expansion.
+    q_norm_sq: f32,
+}
+
+impl EstimatorQuery {
+    /// Pre-rotate a query embedding through `rotation` (zero-centroid).
+    pub fn new(query: &[f32], rotation: &Rotation) -> Self {
+        Self::new_centred(query, rotation, None)
+    }
+
+    /// Pre-rotate a query residual `q_r = q − c` through `rotation`. The
+    /// centroid **must** match the one used to build the bank's sketches.
+    pub fn new_centred(query: &[f32], rotation: &Rotation, centroid: Option<&[f32]>) -> Self {
+        let dim = rotation.dim();
+        let residual: Vec<f32> = (0..dim)
+            .map(|i| {
+                let v = query.get(i).copied().unwrap_or(0.0);
+                let c = centroid.and_then(|c| c.get(i)).copied().unwrap_or(0.0);
+                v - c
+            })
+            .collect();
+        let mut q_norm_sq = 0.0f64;
+        for &v in &residual {
+            q_norm_sq += (v as f64) * (v as f64);
+        }
+        Self {
+            q_rotated_padded: rotation.apply_padded(&residual),
+            q_norm_sq: q_norm_sq as f32,
+        }
+    }
+}
+
+/// Computes RaBitQ unbiased estimates from an [`EstimatorSketch`] + a
+/// pre-rotated [`EstimatorQuery`].
+///
+/// Stateless — the methods are associated functions. Kept as a type for
+/// discoverability and to group the estimator formula in one place.
+pub struct DistanceEstimator;
+
+impl DistanceEstimator {
+    /// Unbiased estimate of `⟨o_r, q_r⟩` (the inner product of the residuals).
+    ///
+    /// `⟨o_r, q_r⟩ = ‖o_r‖ · (⟨x̄, q'⟩ / ⟨x̄, o'⟩)`. Returns `0.0` when the
+    /// stored `x_dot_o` is non-positive (degenerate / zero residual), which
+    /// cannot happen for a non-zero input but keeps the call total.
+    pub fn estimate_inner_product(sketch: &EstimatorSketch, query: &EstimatorQuery) -> f32 {
+        let x_dot_o = sketch.side.x_dot_o;
+        if x_dot_o <= 0.0 {
+            return 0.0;
+        }
+        let code_dot_q = sketch.code_dot(&query.q_rotated_padded);
+        // ⟨o, q_r⟩ ≈ ⟨x̄, q'⟩ / x_dot_o   (unit residual o)
+        let inner_unit = code_dot_q / x_dot_o;
+        sketch.side.residual_norm * inner_unit
+    }
+
+    /// Unbiased estimate of the **squared euclidean distance** `‖q_r − o_r‖²`.
+    ///
+    /// `= ‖q_r‖² + ‖o_r‖² − 2·⟨o_r, q_r⟩`, using the estimated inner product.
+    /// This is the value the unbiasedness test checks.
+    pub fn estimate_sq_distance(sketch: &EstimatorSketch, query: &EstimatorQuery) -> f32 {
+        let ip = Self::estimate_inner_product(sketch, query);
+        let o_norm = sketch.side.residual_norm;
+        query.q_norm_sq + o_norm * o_norm - 2.0 * ip
+    }
+
+    /// The cheap **euclidean ranking key** for nearest-neighbour reranking:
+    /// monotone in the estimated squared distance with the per-query constant
+    /// `‖q_r‖²` dropped. Smaller = nearer. Equals `‖o_r‖² − 2·⟨o_r, q_r⟩`.
+    ///
+    /// Use this (not [`Self::estimate_sq_distance`]) for top-K reranking under a
+    /// **euclidean** ground truth — it avoids adding the same `q_norm_sq` to
+    /// every candidate. For a **cosine** ground truth (AETHER / the coverage
+    /// harness), use [`Self::cosine_ranking_key`] instead.
+    #[inline]
+    pub fn ranking_key(sketch: &EstimatorSketch, query: &EstimatorQuery) -> f32 {
+        let ip = Self::estimate_inner_product(sketch, query);
+        let o_norm = sketch.side.residual_norm;
+        o_norm * o_norm - 2.0 * ip
+    }
+
+    /// The cheap **cosine ranking key**: smaller = nearer in cosine distance.
+    ///
+    /// Cosine distance is `1 − ⟨o_r,q_r⟩ / (‖o_r‖·‖q_r‖)`. `‖q_r‖` is a
+    /// per-query constant, so ranking by cosine distance ascending is ranking by
+    /// `⟨o_r,q_r⟩ / ‖o_r‖` **descending**, i.e. by `−⟨o, q_r⟩` ascending. And
+    /// `⟨o, q_r⟩ = ⟨x̄, q'⟩ / x_dot_o` — the unit-residual inner product, which
+    /// needs **only the code and `x_dot_o`**, not even `residual_norm`. We
+    /// return `−⟨o, q_r⟩` so "smaller = nearer" matches the euclidean key's
+    /// convention.
+    ///
+    /// This is the correct key when the sketch is used (as in ADR-084) as an
+    /// **angular** sensor graded against a cosine top-K: the 1-bit code is a
+    /// rotated-angle estimator, and dividing by `x_dot_o` is the RaBitQ unbiased
+    /// rescale of that angle's inner product.
+    #[inline]
+    pub fn cosine_ranking_key(sketch: &EstimatorSketch, query: &EstimatorQuery) -> f32 {
+        let x_dot_o = sketch.side.x_dot_o;
+        if x_dot_o <= 0.0 {
+            return 0.0;
+        }
+        // ⟨o, q_r⟩ = ⟨x̄, q'⟩ / x_dot_o ; nearer in cosine ⇒ larger ⇒ negate.
+        -(sketch.code_dot(&query.q_rotated_padded) / x_dot_o)
+    }
+}
+
+/// A bank of [`EstimatorSketch`]es with stable IDs, reranked by the RaBitQ
+/// **unbiased distance estimate** instead of raw Hamming.
+///
+/// All sketches share one [`Rotation`] (the index/query frame). The bank rotates
+/// every inserted embedding and every query through it, so the estimator is
+/// always computed in a consistent frame.
+///
+/// # Invariants
+/// - All sketches share the bank's `embedding_dim` and `Rotation`.
+/// - IDs are caller-assigned and stable.
+#[derive(Debug, Clone)]
+pub struct EstimatorBank {
+    rotation: Rotation,
+    entries: Vec<(u32, EstimatorSketch)>,
+    embedding_dim: usize,
+    /// Optional shared centroid subtracted from every embedding/query before
+    /// rotation. `None` = zero-centroid (the default simplification).
+    centroid: Option<Vec<f32>>,
+}
+
+impl EstimatorBank {
+    /// Create an empty bank over `rotation`'s dimension and frame (zero-centroid).
+    pub fn new(rotation: Rotation) -> Self {
+        let embedding_dim = rotation.dim();
+        Self {
+            rotation,
+            entries: Vec::new(),
+            embedding_dim,
+            centroid: None,
+        }
+    }
+
+    /// Create an empty bank that subtracts `centroid` from every embedding and
+    /// query before rotation (the paper's centroid path). Used by ADR-156 to
+    /// measure the cost of the zero-centroid simplification.
+    pub fn with_centroid(rotation: Rotation, centroid: Vec<f32>) -> Self {
+        let embedding_dim = rotation.dim();
+        Self {
+            rotation,
+            entries: Vec::new(),
+            embedding_dim,
+            centroid: Some(centroid),
+        }
+    }
+
+    /// The rotation (index/query frame) this bank uses.
+    #[inline]
+    pub fn rotation(&self) -> &Rotation {
+        &self.rotation
+    }
+
+    /// Number of stored sketches.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.entries.len()
+    }
+
+    /// True iff empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.entries.is_empty()
+    }
+
+    /// Source embedding dimension.
+    #[inline]
+    pub fn embedding_dim(&self) -> usize {
+        self.embedding_dim
+    }
+
+    /// Insert a raw embedding, sketching it (with side info) through the bank's
+    /// rotation. The stored code and the queries share one rotated frame.
+    pub fn insert_embedding(&mut self, id: u32, embedding: &[f32]) {
+        let sketch = EstimatorSketch::from_embedding_centred(
+            embedding,
+            &self.rotation,
+            self.centroid.as_deref(),
+        );
+        self.entries.push((id, sketch));
+    }
+
+    /// Insert a pre-built [`EstimatorSketch`] (must have been built with this
+    /// bank's rotation; the caller is responsible for that).
+    pub fn insert(&mut self, id: u32, sketch: EstimatorSketch) {
+        self.entries.push((id, sketch));
+    }
+
+    /// Top-K nearest neighbours by the **RaBitQ unbiased estimate**, ascending
+    /// by [`DistanceEstimator::ranking_key`]. Returns up to `k` `(id, key)`
+    /// pairs. If `k == 0` or the bank is empty, returns empty. If the bank has
+    /// fewer than `k`, returns all of them.
+    ///
+    /// The query is rotated **once**; every candidate then costs one
+    /// length-`D` signed-sum dot product — the estimator is as cheap per
+    /// candidate as Hamming plus a multiply.
+    pub fn topk_estimated(&self, query: &[f32], k: usize) -> Vec<(u32, f32)> {
+        self.topk_by(query, k, DistanceEstimator::ranking_key)
+    }
+
+    /// Top-K by the estimated **cosine** distance
+    /// ([`DistanceEstimator::cosine_ranking_key`]) — the correct rerank when the
+    /// sketch is graded against a cosine top-K (AETHER / the coverage harness).
+    pub fn topk_estimated_cosine(&self, query: &[f32], k: usize) -> Vec<(u32, f32)> {
+        self.topk_by(query, k, DistanceEstimator::cosine_ranking_key)
+    }
+
+    /// Shared top-K driver parameterised on the ranking-key function. Rotates
+    /// the query once, scores every candidate with `key`, returns the `k`
+    /// smallest keys ascending.
+    fn topk_by(
+        &self,
+        query: &[f32],
+        k: usize,
+        key: fn(&EstimatorSketch, &EstimatorQuery) -> f32,
+    ) -> Vec<(u32, f32)> {
+        if k == 0 || self.entries.is_empty() {
+            return Vec::new();
+        }
+        let q = EstimatorQuery::new_centred(query, &self.rotation, self.centroid.as_deref());
+        let mut scored: Vec<(u32, f32)> = self
+            .entries
+            .iter()
+            .map(|(id, sk)| (*id, key(sk, &q)))
+            .collect();
+        // Ascending by ranking key. Total ordering via partial_cmp with a
+        // NaN-safe fallback (estimates are finite for finite input).
+        scored.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
+        scored.truncate(k);
+        scored
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn l2(v: &[f32]) -> f32 {
+        v.iter().map(|&x| x * x).sum::<f32>().sqrt()
+    }
+
+    /// Brute-force true inner product of two residuals (zero-centroid).
+    fn true_inner(a: &[f32], b: &[f32]) -> f32 {
+        a.iter().zip(b).map(|(&x, &y)| x * y).sum()
+    }
+
+    #[test]
+    fn estimator_is_deterministic() {
+        // Same (seed, dim) rotation + same vectors ⇒ identical estimate, twice.
+        let dim = 64;
+        let rot = Rotation::new(0xC0DE_1234_5678_9ABC, dim);
+        let o: Vec<f32> = (0..dim).map(|i| (i as f32 * 0.21).sin() + 0.3).collect();
+        let qv: Vec<f32> = (0..dim).map(|i| (i as f32 * 0.11).cos() - 0.2).collect();
+
+        let s1 = EstimatorSketch::from_embedding(&o, &rot);
+        let s2 = EstimatorSketch::from_embedding(&o, &rot);
+        let q1 = EstimatorQuery::new(&qv, &rot);
+        let q2 = EstimatorQuery::new(&qv, &Rotation::new(0xC0DE_1234_5678_9ABC, dim));
+
+        let e1 = DistanceEstimator::estimate_inner_product(&s1, &q1);
+        let e2 = DistanceEstimator::estimate_inner_product(&s2, &q2);
+        assert_eq!(e1, e2, "estimator must be deterministic for a fixed seed");
+
+        // Bank topk is deterministic too.
+        let mut bank = EstimatorBank::new(Rotation::new(7, dim));
+        for id in 0..16u32 {
+            let v: Vec<f32> = (0..dim).map(|i| ((i + id as usize) as f32 * 0.07).sin()).collect();
+            bank.insert_embedding(id, &v);
+        }
+        let a = bank.topk_estimated(&qv, 5);
+        let b = bank.topk_estimated(&qv, 5);
+        assert_eq!(a, b, "topk_estimated must be deterministic");
+    }
+
+    #[test]
+    fn estimator_unbiased_on_fixture() {
+        // The core unbiasedness claim: averaging the estimate of ⟨o_r, q_r⟩ over
+        // MANY random rotation seeds converges to the true inner product.
+        //
+        // Hand-checkable small case: two fixed vectors, known true inner
+        // product, average the estimator over many seeds and assert it lands
+        // within a tolerance that a BIASED estimator would miss.
+        let dim = 32;
+        let o: Vec<f32> = (0..dim).map(|i| ((i % 7) as f32 - 3.0) * 0.4 + 0.5).collect();
+        let qv: Vec<f32> = (0..dim).map(|i| ((i % 5) as f32 - 2.0) * 0.3 - 0.1).collect();
+        let truth = true_inner(&o, &qv);
+
+        let n_seeds = 4000u64;
+        let mut acc = 0.0f64;
+        for seed in 0..n_seeds {
+            let rot = Rotation::new(seed.wrapping_mul(0x9E37_79B9_7F4A_7C15) ^ 0xABCD, dim);
+            let sk = EstimatorSketch::from_embedding(&o, &rot);
+            let q = EstimatorQuery::new(&qv, &rot);
+            acc += DistanceEstimator::estimate_inner_product(&sk, &q) as f64;
+        }
+        let mean = (acc / n_seeds as f64) as f32;
+
+        // Tolerance scaled to the magnitudes involved. The estimator is
+        // unbiased, so the Monte-Carlo mean must be CLOSE to truth; a sign-only
+        // Hamming proxy (or a biased rescale) would be systematically off.
+        let scale = l2(&o) * l2(&qv);
+        let tol = 0.06 * scale; // ~6% of the ‖o‖‖q‖ envelope over 4000 seeds
+        assert!(
+            (mean - truth).abs() < tol,
+            "estimator biased: mean={mean:.4} truth={truth:.4} tol={tol:.4} (scale={scale:.4})"
+        );
+    }
+
+    #[test]
+    fn estimator_self_distance_is_small() {
+        // Estimating the distance of a vector to itself should be ~0 (the
+        // estimate of ⟨o,o⟩ ≈ ‖o‖², so ‖q-o‖² ≈ 0). Not exactly 0 (1-bit code),
+        // but small relative to ‖o‖².
+        let dim = 128;
+        let rot = Rotation::new(0xBEEF_CAFE, dim);
+        let o: Vec<f32> = (0..dim).map(|i| (i as f32 * 0.37).cos() + 0.2).collect();
+        let sk = EstimatorSketch::from_embedding(&o, &rot);
+        let q = EstimatorQuery::new(&o, &rot);
+        let sq = DistanceEstimator::estimate_sq_distance(&sk, &q);
+        let o_norm_sq = l2(&o) * l2(&o);
+        assert!(
+            sq.abs() < 0.25 * o_norm_sq,
+            "self sq-distance estimate {sq:.3} too large vs ‖o‖²={o_norm_sq:.3}"
+        );
+    }
+
+    #[test]
+    fn side_info_is_eight_bytes() {
+        assert_eq!(EstimatorSketch::SIDE_INFO_BYTES, 8);
+    }
+
+    #[test]
+    fn x_dot_o_in_unit_range() {
+        // ⟨x̄, o'⟩ ∈ (0, 1] for any non-zero input (it's the cosine between the
+        // rotated residual and its nearest hypercube corner).
+        let dim = 96;
+        let rot = Rotation::new(0x1357_9BDF, dim);
+        for s in 0..20u32 {
+            let v: Vec<f32> = (0..dim).map(|i| (((i + s as usize) * 13 % 23) as f32 - 11.0) * 0.2).collect();
+            let sk = EstimatorSketch::from_embedding(&v, &rot);
+            let x = sk.side_info().x_dot_o;
+            assert!(x > 0.0 && x <= 1.0 + 1e-5, "x_dot_o out of (0,1]: {x}");
+        }
+    }
+
+    #[test]
+    fn zero_input_does_not_panic() {
+        let dim = 64;
+        let rot = Rotation::new(1, dim);
+        let sk = EstimatorSketch::from_embedding(&vec![0.0f32; dim], &rot);
+        assert_eq!(sk.residual_norm(), 0.0);
+        let q = EstimatorQuery::new(&vec![1.0f32; dim], &rot);
+        // No divide-by-zero; degenerate estimate is 0 inner product.
+        assert_eq!(DistanceEstimator::estimate_inner_product(&sk, &q), 0.0);
+    }
+
+    #[test]
+    fn centroid_path_self_query_ranks_self_first() {
+        // The paper-faithful centroid path (o_r = o − c) must still rank a
+        // stored vector first when queried with itself, with a shared centroid.
+        let dim = 64;
+        let rot = Rotation::new(0x9999, dim);
+        let centroid: Vec<f32> = (0..dim).map(|i| (i as f32 * 0.05).sin()).collect();
+        let mut bank = EstimatorBank::with_centroid(rot, centroid.clone());
+        let target: Vec<f32> = (0..dim).map(|i| (i as f32 * 0.23).cos() + 1.5).collect();
+        bank.insert_embedding(7, &target);
+        for id in 0..24u32 {
+            let v: Vec<f32> = (0..dim)
+                .map(|i| ((i as f32 + id as f32) * 0.09).sin() + 1.4)
+                .collect();
+            bank.insert_embedding(id, &v);
+        }
+        let top = bank.topk_estimated_cosine(&target, 1);
+        assert_eq!(top.len(), 1);
+        assert_eq!(top[0].0, 7, "centroid-path self-query should rank self first");
+    }
+
+    #[test]
+    fn centroid_zero_matches_default() {
+        // from_embedding_centred(None) must be byte-identical to from_embedding.
+        let dim = 48;
+        let rot = Rotation::new(0x4242, dim);
+        let v: Vec<f32> = (0..dim).map(|i| (i as f32 * 0.3).sin() - 0.1).collect();
+        let a = EstimatorSketch::from_embedding(&v, &rot);
+        let b = EstimatorSketch::from_embedding_centred(&v, &rot, None);
+        assert_eq!(a.residual_norm(), b.residual_norm());
+        assert_eq!(a.side_info(), b.side_info());
+    }
+
+    #[test]
+    fn bank_self_query_ranks_self_first() {
+        // A bank queried with one of its own stored vectors should rank that id
+        // first under the estimator (its estimated distance to itself is the
+        // smallest).
+        let dim = 128;
+        let rot = Rotation::new(0xABCD_1234, dim);
+        let mut bank = EstimatorBank::new(rot);
+        let target: Vec<f32> = (0..dim).map(|i| (i as f32 * 0.19).sin() * 2.0).collect();
+        bank.insert_embedding(99, &target);
+        for id in 0..32u32 {
+            let v: Vec<f32> = (0..dim)
+                .map(|i| ((i as f32 + id as f32 * 3.0) * 0.05).cos())
+                .collect();
+            bank.insert_embedding(id, &v);
+        }
+        let top = bank.topk_estimated(&target, 1);
+        assert_eq!(top.len(), 1);
+        assert_eq!(top[0].0, 99, "self-query should rank the stored self first");
+    }
+}
diff --git a/v2/crates/wifi-densepose-ruvector/src/lib.rs b/v2/crates/wifi-densepose-ruvector/src/lib.rs
index 3022389d..7bf1b70d 100644
--- a/v2/crates/wifi-densepose-ruvector/src/lib.rs
+++ b/v2/crates/wifi-densepose-ruvector/src/lib.rs
@@ -29,6 +29,7 @@
 #[cfg(feature = "crv")]
 pub mod crv;
 pub mod coverage;
+pub mod estimator;
 pub mod event_log;
 pub mod mat;
 pub mod rotation;
@@ -36,6 +37,9 @@ pub mod signal;
 pub mod sketch;
 pub mod viewpoint;
 
+pub use estimator::{
+    DistanceEstimator, EstimatorBank, EstimatorQuery, EstimatorSketch, SideInfo,
+};
 pub use event_log::{NoveltyEvent, PrivacyEventLog};
 pub use rotation::Rotation;
 pub use sketch::{
diff --git a/v2/crates/wifi-densepose-ruvector/src/rotation.rs b/v2/crates/wifi-densepose-ruvector/src/rotation.rs
index 83314b46..a310b654 100644
--- a/v2/crates/wifi-densepose-ruvector/src/rotation.rs
+++ b/v2/crates/wifi-densepose-ruvector/src/rotation.rs
@@ -144,6 +144,29 @@ impl Rotation {
     /// rounding — see [`Rotation::apply`] tests and
     /// `rotation_preserves_norm`.
     pub fn apply(&self, embedding: &[f32]) -> Vec<f32> {
+        if self.dim == 0 {
+            return Vec::new();
+        }
+        let mut buf = self.apply_padded(embedding);
+        // Read back the first `dim` rotated coordinates as the sketch input.
+        buf.truncate(self.dim);
+        buf
+    }
+
+    /// Apply the rotation `R = H·D` and return **all `padded_dim` rotated
+    /// coordinates** (not truncated to `dim`).
+    ///
+    /// This is the frame the RaBitQ estimator ([`crate::estimator`]) works in:
+    /// the 1-bit code `x̄ ∈ {±1/√D}^D` is unit over the **padded** length `D`,
+    /// and the query dot product `⟨x̄, q'⟩` must be taken over that same `D`. For
+    /// a power-of-two `dim`, `padded_dim == dim` and this equals
+    /// [`Rotation::apply`]; for a non-power-of-two `dim` the tail coordinates
+    /// (the zero-padded energy redistributed by the FHT) are retained here but
+    /// dropped by `apply`.
+    ///
+    /// `dim == 0` yields an empty vector. Ragged input is handled charitably
+    /// (truncate / zero-extend to `dim`), as in [`Rotation::apply`].
+    pub fn apply_padded(&self, embedding: &[f32]) -> Vec<f32> {
         if self.dim == 0 {
             return Vec::new();
         }
@@ -157,9 +180,6 @@ impl Rotation {
 
         // In-place normalized Fast Hadamard Transform.
         fht_normalized(&mut buf);
-
-        // Read back the first `dim` rotated coordinates as the sketch input.
-        buf.truncate(self.dim);
         buf
     }
 }