wifi-densepose/v2/crates/wifi-densepose-signal/benches/dsp_perf_bench.rs

354 lines
13 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! ADR-154 Milestone-2 perf benchmarks (§7.4 P2 "bench-first" items).
//!
//! PROOF discipline (ADR-154 §0): every P2 item is **benched before touched**.
//! A micro-opt is landed only if the bench proves the path hot; otherwise the
//! committed bench *is* the result — a MEASURED-NULL that proves the rewrite was
//! unnecessary (exactly the §5.x "already amortized" pattern). No speedup is
//! claimed without a before/after number from here.
//!
//! Reproduce (compile-only):
//! cargo bench -p wifi-densepose-signal --no-default-features \
//! --bench dsp_perf_bench --no-run
//!
//! Reproduce (full run, writes target/criterion/ HTML):
//! cargo bench -p wifi-densepose-signal --no-default-features --bench dsp_perf_bench
//!
//! Groups:
//! * `multistatic_attention` (#5) — `node_attention_weights` at 2..8 nodes ×
//! 56 subcarriers. Re-derives consensus/softmax each call; no scratch to
//! reuse → expected MEASURED-NULL.
//! * `tomography_reconstruct` (#6) — full ISTA solve. The two voxel buffers are
//! allocated once per `reconstruct()` (then `.fill`-reused across
//! iterations), so the per-solve alloc is 2×n_voxels vs an
//! O(iters·links·voxels) compute → expected MEASURED-NULL.
//! * `pose_kalman_update` (#7) — Kalman predict+update loop. The "gain
//! matrices" are fixed-size **stack** arrays (`[[f32;3];6]`), not heap —
//! nothing to reuse → expected MEASURED-NULL.
//! * `spectrogram_multi_subcarrier` (#20) — `compute_multi_subcarrier_spectrogram`:
//! fresh-planner-per-subcarrier (BEFORE) vs hoisted-plan (AFTER, shipped).
//! The per-subcarrier FFT re-plan is the likely real win.
//! * `field_model_occupancy` (#8, `eigenvalue` only) — per-call n×n
//! eigendecomposition in `estimate_occupancy`. MEASUREMENT-ONLY: quantifies
//! the recompute cost; incremental SVD is a sized future project, not a
//! micro-fix.
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use ndarray::Array2;
use rustfft::FftPlanner;
use std::f64::consts::PI;
use std::time::Duration;
use wifi_densepose_signal::ruvsense::multistatic::node_attention_weights;
use wifi_densepose_signal::ruvsense::pose_tracker::KeypointState;
use wifi_densepose_signal::ruvsense::tomography::{
LinkGeometry, Position3D, RfTomographer, TomographyConfig,
};
use wifi_densepose_signal::spectrogram::{
compute_multi_subcarrier_spectrogram, compute_spectrogram, Spectrogram, SpectrogramConfig,
WindowFunction,
};
// ---------------------------------------------------------------------------
// #5 multistatic node_attention_weights
// ---------------------------------------------------------------------------
fn make_node_amplitudes(n_nodes: usize, n_sub: usize) -> Vec<Vec<f32>> {
(0..n_nodes)
.map(|n| {
(0..n_sub)
.map(|s| {
let phase = (n as f32 * 0.31 + s as f32 * 0.07) % std::f32::consts::TAU;
0.5 + 0.4 * phase.sin()
})
.collect()
})
.collect()
}
fn bench_multistatic_attention(c: &mut Criterion) {
let mut group = c.benchmark_group("multistatic_attention");
group.measurement_time(Duration::from_secs(3));
let n_sub = 56; // canonical-56 grid
for &n_nodes in &[2usize, 4, 8] {
let owned = make_node_amplitudes(n_nodes, n_sub);
let refs: Vec<&[f32]> = owned.iter().map(|v| v.as_slice()).collect();
group.throughput(Throughput::Elements(1));
group.bench_with_input(
BenchmarkId::new("weights", n_nodes),
&refs,
|b, amplitudes| {
b.iter(|| black_box(node_attention_weights(black_box(amplitudes), 1.0)));
},
);
}
group.finish();
}
// ---------------------------------------------------------------------------
// #6 tomography reconstruct (ISTA L1)
// ---------------------------------------------------------------------------
fn make_tomographer(n_links: usize) -> (RfTomographer, Vec<f64>) {
// A modest 8x8x4 grid (256 voxels), n_links TX/RX pairs around the box.
let config = TomographyConfig {
nx: 8,
ny: 8,
nz: 4,
bounds: [0.0, 0.0, 0.0, 4.0, 4.0, 2.0],
lambda: 0.01,
max_iterations: 50,
tolerance: 1e-6,
min_links: 8,
};
let mut links = Vec::with_capacity(n_links);
for i in 0..n_links {
let t = i as f64 / n_links as f64;
links.push(LinkGeometry {
tx: Position3D {
x: 4.0 * (t * PI).cos().abs(),
y: 0.0,
z: 1.0,
},
rx: Position3D {
x: 4.0 * (t * PI).sin().abs(),
y: 4.0,
z: 1.0,
},
link_id: i,
});
}
let tomo = RfTomographer::new(config, &links).unwrap();
// Deterministic attenuations (one occupied region in the middle).
let attenuations: Vec<f64> = (0..n_links)
.map(|i| 0.1 + 0.05 * ((i as f64 * 0.3).sin()))
.collect();
(tomo, attenuations)
}
fn bench_tomography_reconstruct(c: &mut Criterion) {
let mut group = c.benchmark_group("tomography_reconstruct");
group.measurement_time(Duration::from_secs(4));
for &n_links in &[16usize, 32] {
let (tomo, atten) = make_tomographer(n_links);
group.throughput(Throughput::Elements(1));
group.bench_with_input(
BenchmarkId::new("solve", n_links),
&(tomo, atten),
|b, (tomo, atten)| {
b.iter(|| black_box(tomo.reconstruct(black_box(atten)).unwrap().occupied_count));
},
);
}
group.finish();
}
// ---------------------------------------------------------------------------
// #7 pose tracker Kalman update loop
// ---------------------------------------------------------------------------
fn bench_pose_kalman_update(c: &mut Criterion) {
let mut group = c.benchmark_group("pose_kalman_update");
group.measurement_time(Duration::from_secs(3));
// 17 keypoints (COCO-17), N predict+update cycles — a realistic frame batch.
for &n_updates in &[17usize, 170] {
group.throughput(Throughput::Elements(n_updates as u64));
group.bench_with_input(BenchmarkId::new("cycles", n_updates), &n_updates, |b, &n| {
b.iter(|| {
let mut acc = 0.0_f32;
for k in 0..n {
let mut state = KeypointState::new(
(k as f32 * 0.1).sin(),
(k as f32 * 0.2).cos(),
1.0 + (k as f32 * 0.05),
);
state.predict(0.05, 0.5);
let meas = [
(k as f32 * 0.1).sin() + 0.01,
(k as f32 * 0.2).cos() - 0.01,
1.0 + (k as f32 * 0.05),
];
state.update(&meas, 0.1, 1.0);
acc += state.state[0];
}
black_box(acc)
});
});
}
group.finish();
}
// ---------------------------------------------------------------------------
// #20 multi-subcarrier spectrogram: fresh-planner vs hoisted plan
// ---------------------------------------------------------------------------
fn make_csi_temporal(n_samples: usize, n_sc: usize) -> Array2<f64> {
Array2::from_shape_fn((n_samples, n_sc), |(t, sc)| {
let freq = 0.7 + sc as f64 * 0.13;
(2.0 * PI * freq * t as f64 / 100.0).sin()
+ 0.3 * (2.0 * PI * (freq * 2.1) * t as f64 / 100.0).cos()
})
}
/// BEFORE: re-plan the FFT inside `compute_spectrogram` for every subcarrier.
/// Faithful transcription of the pre-ADR-154-M2 `compute_multi_subcarrier_spectrogram`.
fn multi_fresh_planner(
csi: &Array2<f64>,
sample_rate: f64,
config: &SpectrogramConfig,
) -> Vec<Spectrogram> {
let (_, n_sc) = csi.dim();
(0..n_sc)
.map(|sc| {
let col: Vec<f64> = csi.column(sc).to_vec();
// compute_spectrogram builds a fresh FftPlanner on every call.
compute_spectrogram(&col, sample_rate, config).unwrap()
})
.collect()
}
fn bench_spectrogram_multi_subcarrier(c: &mut Criterion) {
let mut group = c.benchmark_group("spectrogram_multi_subcarrier");
group.measurement_time(Duration::from_secs(5));
let sample_rate = 100.0;
// Realistic: 600 temporal samples (~6 s @ 100 Hz) across 56 subcarriers,
// window 128. n_sc re-plans removed by the hoist.
for &(n_samples, n_sc, window) in &[(600usize, 56usize, 128usize), (600, 56, 256)] {
let csi = make_csi_temporal(n_samples, n_sc);
let config = SpectrogramConfig {
window_size: window,
hop_size: 64,
window_fn: WindowFunction::Hann,
power: true,
};
group.throughput(Throughput::Elements(n_sc as u64));
// BEFORE: fresh planner per subcarrier.
group.bench_with_input(
BenchmarkId::new("fresh_planner", format!("sc{n_sc}_w{window}")),
&config,
|b, cfg| {
b.iter(|| black_box(multi_fresh_planner(black_box(&csi), sample_rate, cfg).len()));
},
);
// AFTER: hoisted plan (the shipped `compute_multi_subcarrier_spectrogram`).
group.bench_with_input(
BenchmarkId::new("hoisted_plan", format!("sc{n_sc}_w{window}")),
&config,
|b, cfg| {
b.iter(|| {
black_box(
compute_multi_subcarrier_spectrogram(black_box(&csi), sample_rate, cfg)
.unwrap()
.len(),
)
});
},
);
}
group.finish();
}
// A standalone FftPlanner sanity micro-bench documenting the cost the hoist
// removes: building+planning a length-N forward FFT once.
fn bench_fft_plan_cost(c: &mut Criterion) {
let mut group = c.benchmark_group("fft_plan_cost");
group.measurement_time(Duration::from_secs(2));
for &n in &[128usize, 256] {
group.bench_with_input(BenchmarkId::new("plan_forward", n), &n, |b, &n| {
b.iter(|| {
let mut planner = FftPlanner::<f64>::new();
black_box(planner.plan_fft_forward(black_box(n)))
});
});
}
group.finish();
}
// ---------------------------------------------------------------------------
// #8 field_model SVD/eigendecomposition recompute (MEASUREMENT-ONLY)
// ---------------------------------------------------------------------------
// `estimate_occupancy` builds an n×n covariance and eigendecomposes it on every
// call (BLAS, `eigenvalue` feature). This bench quantifies that per-call cost so
// ADR-154 §7.4 #8 can record a number; incremental SVD is a sized future item,
// NOT attempted here.
#[cfg(feature = "eigenvalue")]
mod eig {
use super::*;
use wifi_densepose_signal::ruvsense::field_model::{FieldModel, FieldModelConfig};
fn calibrated_model(n_sub: usize, n_links: usize) -> FieldModel {
let config = FieldModelConfig {
n_subcarriers: n_sub,
n_links,
n_modes: 3,
min_calibration_frames: 20,
baseline_expiry_s: 86_400.0,
};
let mut model = FieldModel::new(config).unwrap();
// Feed deterministic calibration frames: [n_links][n_sub] per observation.
for f in 0..30 {
let obs: Vec<Vec<f64>> = (0..n_links)
.map(|l| {
(0..n_sub)
.map(|s| {
0.5 + 0.3
* ((f as f64 * 0.1 + l as f64 * 0.2 + s as f64 * 0.05).sin())
})
.collect()
})
.collect();
model.feed_calibration(&obs).unwrap();
}
model.finalize_calibration(0, 0).unwrap();
model
}
pub fn bench_field_model_occupancy(c: &mut Criterion) {
let mut group = c.benchmark_group("field_model_occupancy");
group.measurement_time(Duration::from_secs(4));
let n_sub = 56;
let model = calibrated_model(n_sub, 4);
// Sliding window of recent frames (50 ~ 2.5 s @ 20 Hz).
let frames: Vec<Vec<f64>> = (0..50)
.map(|t| {
(0..n_sub)
.map(|s| 0.5 + 0.3 * ((t as f64 * 0.15 + s as f64 * 0.07).sin()))
.collect()
})
.collect();
group.throughput(Throughput::Elements(1));
group.bench_function(BenchmarkId::new("eigh", n_sub), |b| {
b.iter(|| black_box(model.estimate_occupancy(black_box(&frames))));
});
group.finish();
}
}
#[cfg(feature = "eigenvalue")]
criterion_group!(
benches,
bench_multistatic_attention,
bench_tomography_reconstruct,
bench_pose_kalman_update,
bench_spectrogram_multi_subcarrier,
bench_fft_plan_cost,
eig::bench_field_model_occupancy,
);
#[cfg(not(feature = "eigenvalue"))]
criterion_group!(
benches,
bench_multistatic_attention,
bench_tomography_reconstruct,
bench_pose_kalman_update,
bench_spectrogram_multi_subcarrier,
bench_fft_plan_cost,
);
criterion_main!(benches);