perf(ruvector): eliminate fuse() double-clone (~2.17x marshalling) + bench (ADR-156 §2.4, §4)

MultistaticArray::fuse / fuse_ungated cloned every viewpoint embedding twice per
fusion (once into `extracted`, again when building the attention input). Now the
embeddings are MOVED out of `extracted` (one clone per viewpoint instead of two),
capturing geometry/ids by Copy in the same pass. Correctness-neutral — all 100
viewpoint/mat lib tests pass unchanged.

MEASURED (new benches/fusion_bench.rs, embedding_extract A/B, 8 vp x 128-d):
  before_double_clone 1.0029 us -> after_single_clone 461.6 ns  (~2.17x)
End-to-end fusion_pipeline (8 vp): 202 us — marshalling is <1% of fusion
(n*n attention dominates), so end-to-end win is modest; the A/B isolates the
clone elimination. Reproduce:
  cargo bench -p wifi-densepose-ruvector --bench fusion_bench

Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
ruv 2026-06-11 20:23:27 -04:00
parent a2daa2e443
commit a92b043143
3 changed files with 179 additions and 21 deletions

View File

@ -43,3 +43,7 @@ required-features = ["crv"]
[[bench]]
name = "sketch_bench"
harness = false
[[bench]]
name = "fusion_bench"
harness = false

View File

@ -0,0 +1,148 @@
//! ADR-156 §finding 4/5 — cross-viewpoint fusion hot-path benchmark.
//!
//! Two groups:
//!
//! 1. **`fusion_pipeline`** — end-to-end `MultistaticArray::fuse()` at realistic
//! array sizes (28 viewpoints) and the AETHER embedding dimension (128).
//! This is the production fusion path exercised once per TDM cycle.
//!
//! 2. **`embedding_extract`** — an isolated A/B of the embedding-marshalling step
//! that finding 4 fixed: the OLD code cloned every viewpoint embedding
//! *twice* (once into `extracted`, once into `embeddings`); the NEW code
//! clones once (out of the borrowed `viewpoints`) and then *moves* into the
//! attention input. The `before_double_clone` / `after_single_clone` benches
//! measure exactly that difference so the perf claim is MEASURED, not asserted.
//!
//! Run with:
//! ```bash
//! cargo bench -p wifi-densepose-ruvector --bench fusion_bench
//! ```
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use std::hint;
use wifi_densepose_ruvector::viewpoint::attention::ViewpointGeometry;
use wifi_densepose_ruvector::viewpoint::{FusionConfig, MultistaticArray, ViewpointEmbedding};
/// Deterministic pseudo-random embedding (LCG — no `rand` dev-dep needed).
fn make_embedding(dim: usize, seed: u32) -> Vec<f32> {
let mut state = seed.wrapping_mul(2654435761).wrapping_add(1);
(0..dim)
.map(|_| {
state = state.wrapping_mul(1664525).wrapping_add(1013904223);
(state >> 8) as f32 / (1u32 << 24) as f32 - 0.5
})
.collect()
}
/// Build a coherent array of `n` viewpoints with `dim`-d embeddings, gate open.
fn make_array(n: usize, dim: usize) -> MultistaticArray {
let config = FusionConfig {
embed_dim: dim,
coherence_threshold: 0.5,
coherence_hysteresis: 0.0,
min_snr_db: 0.0,
..FusionConfig::default()
};
let mut array = MultistaticArray::new(1, config);
for _ in 0..60 {
array.push_phase_diff(0.1); // coherent → gate opens
}
for i in 0..n {
let angle = 2.0 * std::f32::consts::PI * i as f32 / n as f32;
let r = 3.0;
array
.submit_viewpoint(ViewpointEmbedding {
node_id: i as u32,
embedding: make_embedding(dim, i as u32 + 1),
azimuth: angle,
elevation: 0.0,
baseline: r,
position: (r * angle.cos(), r * angle.sin()),
snr_db: 15.0,
})
.unwrap();
}
array
}
fn bench_fusion_pipeline(c: &mut Criterion) {
let dim = 128; // AETHER embedding dimension (ADR-024)
let mut group = c.benchmark_group("fusion_pipeline");
for n in [2usize, 4, 8] {
group.bench_with_input(BenchmarkId::from_parameter(n), &n, |b, &n| {
let mut array = make_array(n, dim);
b.iter(|| {
let fused = array.fuse_ungated().unwrap();
hint::black_box(&fused);
});
});
}
group.finish();
}
// --- Finding 4 A/B: double-clone vs single-move embedding marshalling ---------
/// OLD behaviour: clone every embedding into `extracted`, then clone AGAIN into
/// the attention input vector (two heap allocations + two memcpys per viewpoint).
fn extract_double_clone(viewpoints: &[ViewpointEmbedding]) -> Vec<Vec<f32>> {
type Ext = (u32, Vec<f32>, f32, (f32, f32));
let extracted: Vec<Ext> = viewpoints
.iter()
.map(|v| (v.node_id, v.embedding.clone(), v.azimuth, v.position))
.collect();
// Second clone (the bug).
let embeddings: Vec<Vec<f32>> = extracted.iter().map(|(_, e, _, _)| e.clone()).collect();
let _geom: Vec<ViewpointGeometry> = extracted
.iter()
.map(|(_, _, az, pos)| ViewpointGeometry {
azimuth: *az,
position: *pos,
})
.collect();
embeddings
}
/// NEW behaviour: clone once into `extracted`, then MOVE into the attention
/// input (one heap allocation + one memcpy per viewpoint).
fn extract_single_clone(viewpoints: &[ViewpointEmbedding]) -> Vec<Vec<f32>> {
type Ext = (u32, Vec<f32>, f32, (f32, f32));
let extracted: Vec<Ext> = viewpoints
.iter()
.map(|v| (v.node_id, v.embedding.clone(), v.azimuth, v.position))
.collect();
let mut embeddings: Vec<Vec<f32>> = Vec::with_capacity(extracted.len());
let mut _geom: Vec<ViewpointGeometry> = Vec::with_capacity(extracted.len());
for (_, emb, az, pos) in extracted {
_geom.push(ViewpointGeometry { azimuth: az, position: pos });
embeddings.push(emb); // move
}
embeddings
}
fn bench_embedding_extract(c: &mut Criterion) {
let dim = 128;
let n = 8; // max realistic multistatic array
let viewpoints: Vec<ViewpointEmbedding> = (0..n)
.map(|i| ViewpointEmbedding {
node_id: i as u32,
embedding: make_embedding(dim, i as u32 + 1),
azimuth: 0.0,
elevation: 0.0,
baseline: 3.0,
position: (0.0, 0.0),
snr_db: 15.0,
})
.collect();
let mut group = c.benchmark_group("embedding_extract");
group.bench_function("before_double_clone", |b| {
b.iter(|| black_box(extract_double_clone(black_box(&viewpoints))));
});
group.bench_function("after_single_clone", |b| {
b.iter(|| black_box(extract_single_clone(black_box(&viewpoints))));
});
group.finish();
}
criterion_group!(benches, bench_fusion_pipeline, bench_embedding_extract);
criterion_main!(benches);

View File

@ -359,6 +359,10 @@ impl MultistaticArray {
self.cycle_count += 1;
// Extract all needed data from viewpoints upfront to avoid borrow conflicts.
// Embeddings are cloned exactly once (out of `self.viewpoints`, which we
// borrow immutably); metadata is Copy. The previous implementation cloned
// each embedding a SECOND time when building `embeddings` from `extracted`
// — eliminated here (ADR-156 §finding 4).
let min_snr = self.config.min_snr_db;
let total_viewpoints = self.viewpoints.len();
let extracted: Vec<ExtractedViewpoint> = self
@ -394,22 +398,23 @@ impl MultistaticArray {
});
}
// Prepare embeddings and geometries from extracted data.
let embeddings: Vec<Vec<f32>> = extracted.iter().map(|(_, e, _, _)| e.clone()).collect();
let geom: Vec<ViewpointGeometry> = extracted
.iter()
.map(|(_, _, az, pos)| ViewpointGeometry {
azimuth: *az,
position: *pos,
})
.collect();
// Move the cloned embeddings out of `extracted` (no second clone) while
// capturing geometry/ids by Copy. `extracted` is consumed here.
let mut embeddings: Vec<Vec<f32>> = Vec::with_capacity(n_valid);
let mut geom: Vec<ViewpointGeometry> = Vec::with_capacity(n_valid);
let mut azimuths: Vec<f32> = Vec::with_capacity(n_valid);
let mut ids: Vec<NodeId> = Vec::with_capacity(n_valid);
for (id, emb, az, pos) in extracted {
geom.push(ViewpointGeometry { azimuth: az, position: pos });
azimuths.push(az);
ids.push(id);
embeddings.push(emb); // move, not clone
}
// Run cross-viewpoint attention fusion.
let fused_emb = self.attention.fuse(&embeddings, &geom)?;
// Compute GDI.
let azimuths: Vec<f32> = extracted.iter().map(|(_, _, az, _)| *az).collect();
let ids: Vec<NodeId> = extracted.iter().map(|(id, _, _, _)| *id).collect();
let gdi_opt = GeometricDiversityIndex::compute(&azimuths, &ids);
let (gdi_val, n_eff) = match &gdi_opt {
Some(g) => (g.value, g.n_effective),
@ -456,19 +461,20 @@ impl MultistaticArray {
});
}
let embeddings: Vec<Vec<f32>> = extracted.iter().map(|(_, e, _, _)| e.clone()).collect();
let geom: Vec<ViewpointGeometry> = extracted
.iter()
.map(|(_, _, az, pos)| ViewpointGeometry {
azimuth: *az,
position: *pos,
})
.collect();
// Move embeddings out of `extracted` (no second clone — ADR-156 §finding 4).
let mut embeddings: Vec<Vec<f32>> = Vec::with_capacity(n_valid);
let mut geom: Vec<ViewpointGeometry> = Vec::with_capacity(n_valid);
let mut azimuths: Vec<f32> = Vec::with_capacity(n_valid);
let mut ids: Vec<NodeId> = Vec::with_capacity(n_valid);
for (id, emb, az, pos) in extracted {
geom.push(ViewpointGeometry { azimuth: az, position: pos });
azimuths.push(az);
ids.push(id);
embeddings.push(emb);
}
let fused_emb = self.attention.fuse(&embeddings, &geom)?;
let azimuths: Vec<f32> = extracted.iter().map(|(_, _, az, _)| *az).collect();
let ids: Vec<NodeId> = extracted.iter().map(|(id, _, _, _)| *id).collect();
let gdi_opt = GeometricDiversityIndex::compute(&azimuths, &ids);
let (gdi_val, n_eff) = match &gdi_opt {
Some(g) => (g.value, g.n_effective),