wifi-densepose/v2/crates/wifi-densepose-signal/benches/dsp_perf_bench.rs

//! ADR-154 Milestone-2 perf benchmarks (§7.4 P2 "bench-first" items).
//!
//! PROOF discipline (ADR-154 §0): every P2 item is **benched before touched**.
//! A micro-opt is landed only if the bench proves the path hot; otherwise the
//! committed bench *is* the result — a MEASURED-NULL that proves the rewrite was
//! unnecessary (exactly the §5.x "already amortized" pattern). No speedup is
//! claimed without a before/after number from here.
//!
//! Reproduce (compile-only):
//!   cargo bench -p wifi-densepose-signal --no-default-features \
//!     --bench dsp_perf_bench --no-run
//!
//! Reproduce (full run, writes target/criterion/ HTML):
//!   cargo bench -p wifi-densepose-signal --no-default-features --bench dsp_perf_bench
//!
//! Groups:
//!   * `multistatic_attention` (#5) — `node_attention_weights` at 2..8 nodes ×
//!       56 subcarriers. Re-derives consensus/softmax each call; no scratch to
//!       reuse → expected MEASURED-NULL.
//!   * `tomography_reconstruct` (#6) — full ISTA solve. The two voxel buffers are
//!       allocated once per `reconstruct()` (then `.fill`-reused across
//!       iterations), so the per-solve alloc is 2×n_voxels vs an
//!       O(iters·links·voxels) compute → expected MEASURED-NULL.
//!   * `pose_kalman_update` (#7) — Kalman predict+update loop. The "gain
//!       matrices" are fixed-size **stack** arrays (`[[f32;3];6]`), not heap —
//!       nothing to reuse → expected MEASURED-NULL.
//!   * `spectrogram_multi_subcarrier` (#20) — `compute_multi_subcarrier_spectrogram`:
//!       fresh-planner-per-subcarrier (BEFORE) vs hoisted-plan (AFTER, shipped).
//!       The per-subcarrier FFT re-plan is the likely real win.
//!   * `field_model_occupancy` (#8, `eigenvalue` only) — per-call n×n
//!       eigendecomposition in `estimate_occupancy`. MEASUREMENT-ONLY: quantifies
//!       the recompute cost; incremental SVD is a sized future project, not a
//!       micro-fix.

use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use ndarray::Array2;
use rustfft::FftPlanner;
use std::f64::consts::PI;
use std::time::Duration;

use wifi_densepose_signal::ruvsense::multistatic::node_attention_weights;
use wifi_densepose_signal::ruvsense::pose_tracker::KeypointState;
use wifi_densepose_signal::ruvsense::tomography::{
    LinkGeometry, Position3D, RfTomographer, TomographyConfig,
};
use wifi_densepose_signal::spectrogram::{
    compute_multi_subcarrier_spectrogram, compute_spectrogram, Spectrogram, SpectrogramConfig,
    WindowFunction,
};

// ---------------------------------------------------------------------------
// #5 multistatic node_attention_weights
// ---------------------------------------------------------------------------

fn make_node_amplitudes(n_nodes: usize, n_sub: usize) -> Vec<Vec<f32>> {
    (0..n_nodes)
        .map(|n| {
            (0..n_sub)
                .map(|s| {
                    let phase = (n as f32 * 0.31 + s as f32 * 0.07) % std::f32::consts::TAU;
                    0.5 + 0.4 * phase.sin()
                })
                .collect()
        })
        .collect()
}

fn bench_multistatic_attention(c: &mut Criterion) {
    let mut group = c.benchmark_group("multistatic_attention");
    group.measurement_time(Duration::from_secs(3));
    let n_sub = 56; // canonical-56 grid

    for &n_nodes in &[2usize, 4, 8] {
        let owned = make_node_amplitudes(n_nodes, n_sub);
        let refs: Vec<&[f32]> = owned.iter().map(|v| v.as_slice()).collect();
        group.throughput(Throughput::Elements(1));
        group.bench_with_input(
            BenchmarkId::new("weights", n_nodes),
            &refs,
            |b, amplitudes| {
                b.iter(|| black_box(node_attention_weights(black_box(amplitudes), 1.0)));
            },
        );
    }
    group.finish();
}

// ---------------------------------------------------------------------------
// #6 tomography reconstruct (ISTA L1)
// ---------------------------------------------------------------------------

fn make_tomographer(n_links: usize) -> (RfTomographer, Vec<f64>) {
    // A modest 8x8x4 grid (256 voxels), n_links TX/RX pairs around the box.
    let config = TomographyConfig {
        nx: 8,
        ny: 8,
        nz: 4,
        bounds: [0.0, 0.0, 0.0, 4.0, 4.0, 2.0],
        lambda: 0.01,
        max_iterations: 50,
        tolerance: 1e-6,
        min_links: 8,
    };
    let mut links = Vec::with_capacity(n_links);
    for i in 0..n_links {
        let t = i as f64 / n_links as f64;
        links.push(LinkGeometry {
            tx: Position3D {
                x: 4.0 * (t * PI).cos().abs(),
                y: 0.0,
                z: 1.0,
            },
            rx: Position3D {
                x: 4.0 * (t * PI).sin().abs(),
                y: 4.0,
                z: 1.0,
            },
            link_id: i,
        });
    }
    let tomo = RfTomographer::new(config, &links).unwrap();
    // Deterministic attenuations (one occupied region in the middle).
    let attenuations: Vec<f64> = (0..n_links)
        .map(|i| 0.1 + 0.05 * ((i as f64 * 0.3).sin()))
        .collect();
    (tomo, attenuations)
}

fn bench_tomography_reconstruct(c: &mut Criterion) {
    let mut group = c.benchmark_group("tomography_reconstruct");
    group.measurement_time(Duration::from_secs(4));

    for &n_links in &[16usize, 32] {
        let (tomo, atten) = make_tomographer(n_links);
        group.throughput(Throughput::Elements(1));
        group.bench_with_input(
            BenchmarkId::new("solve", n_links),
            &(tomo, atten),
            |b, (tomo, atten)| {
                b.iter(|| black_box(tomo.reconstruct(black_box(atten)).unwrap().occupied_count));
            },
        );
    }
    group.finish();
}

// ---------------------------------------------------------------------------
// #7 pose tracker Kalman update loop
// ---------------------------------------------------------------------------

fn bench_pose_kalman_update(c: &mut Criterion) {
    let mut group = c.benchmark_group("pose_kalman_update");
    group.measurement_time(Duration::from_secs(3));

    // 17 keypoints (COCO-17), N predict+update cycles — a realistic frame batch.
    for &n_updates in &[17usize, 170] {
        group.throughput(Throughput::Elements(n_updates as u64));
        group.bench_with_input(BenchmarkId::new("cycles", n_updates), &n_updates, |b, &n| {
            b.iter(|| {
                let mut acc = 0.0_f32;
                for k in 0..n {
                    let mut state = KeypointState::new(
                        (k as f32 * 0.1).sin(),
                        (k as f32 * 0.2).cos(),
                        1.0 + (k as f32 * 0.05),
                    );
                    state.predict(0.05, 0.5);
                    let meas = [
                        (k as f32 * 0.1).sin() + 0.01,
                        (k as f32 * 0.2).cos() - 0.01,
                        1.0 + (k as f32 * 0.05),
                    ];
                    state.update(&meas, 0.1, 1.0);
                    acc += state.state[0];
                }
                black_box(acc)
            });
        });
    }
    group.finish();
}

// ---------------------------------------------------------------------------
// #20 multi-subcarrier spectrogram: fresh-planner vs hoisted plan
// ---------------------------------------------------------------------------

fn make_csi_temporal(n_samples: usize, n_sc: usize) -> Array2<f64> {
    Array2::from_shape_fn((n_samples, n_sc), |(t, sc)| {
        let freq = 0.7 + sc as f64 * 0.13;
        (2.0 * PI * freq * t as f64 / 100.0).sin()
            + 0.3 * (2.0 * PI * (freq * 2.1) * t as f64 / 100.0).cos()
    })
}

/// BEFORE: re-plan the FFT inside `compute_spectrogram` for every subcarrier.
/// Faithful transcription of the pre-ADR-154-M2 `compute_multi_subcarrier_spectrogram`.
fn multi_fresh_planner(
    csi: &Array2<f64>,
    sample_rate: f64,
    config: &SpectrogramConfig,
) -> Vec<Spectrogram> {
    let (_, n_sc) = csi.dim();
    (0..n_sc)
        .map(|sc| {
            let col: Vec<f64> = csi.column(sc).to_vec();
            // compute_spectrogram builds a fresh FftPlanner on every call.
            compute_spectrogram(&col, sample_rate, config).unwrap()
        })
        .collect()
}

fn bench_spectrogram_multi_subcarrier(c: &mut Criterion) {
    let mut group = c.benchmark_group("spectrogram_multi_subcarrier");
    group.measurement_time(Duration::from_secs(5));
    let sample_rate = 100.0;

    // Realistic: 600 temporal samples (~6 s @ 100 Hz) across 56 subcarriers,
    // window 128. n_sc re-plans removed by the hoist.
    for &(n_samples, n_sc, window) in &[(600usize, 56usize, 128usize), (600, 56, 256)] {
        let csi = make_csi_temporal(n_samples, n_sc);
        let config = SpectrogramConfig {
            window_size: window,
            hop_size: 64,
            window_fn: WindowFunction::Hann,
            power: true,
        };
        group.throughput(Throughput::Elements(n_sc as u64));

        // BEFORE: fresh planner per subcarrier.
        group.bench_with_input(
            BenchmarkId::new("fresh_planner", format!("sc{n_sc}_w{window}")),
            &config,
            |b, cfg| {
                b.iter(|| black_box(multi_fresh_planner(black_box(&csi), sample_rate, cfg).len()));
            },
        );

        // AFTER: hoisted plan (the shipped `compute_multi_subcarrier_spectrogram`).
        group.bench_with_input(
            BenchmarkId::new("hoisted_plan", format!("sc{n_sc}_w{window}")),
            &config,
            |b, cfg| {
                b.iter(|| {
                    black_box(
                        compute_multi_subcarrier_spectrogram(black_box(&csi), sample_rate, cfg)
                            .unwrap()
                            .len(),
                    )
                });
            },
        );
    }
    group.finish();
}

// A standalone FftPlanner sanity micro-bench documenting the cost the hoist
// removes: building+planning a length-N forward FFT once.
fn bench_fft_plan_cost(c: &mut Criterion) {
    let mut group = c.benchmark_group("fft_plan_cost");
    group.measurement_time(Duration::from_secs(2));
    for &n in &[128usize, 256] {
        group.bench_with_input(BenchmarkId::new("plan_forward", n), &n, |b, &n| {
            b.iter(|| {
                let mut planner = FftPlanner::<f64>::new();
                black_box(planner.plan_fft_forward(black_box(n)))
            });
        });
    }
    group.finish();
}

// ---------------------------------------------------------------------------
// #8 field_model SVD/eigendecomposition recompute (MEASUREMENT-ONLY)
// ---------------------------------------------------------------------------
// `estimate_occupancy` builds an n×n covariance and eigendecomposes it on every
// call (BLAS, `eigenvalue` feature). This bench quantifies that per-call cost so
// ADR-154 §7.4 #8 can record a number; incremental SVD is a sized future item,
// NOT attempted here.
#[cfg(feature = "eigenvalue")]
mod eig {
    use super::*;
    use wifi_densepose_signal::ruvsense::field_model::{FieldModel, FieldModelConfig};

    fn calibrated_model(n_sub: usize, n_links: usize) -> FieldModel {
        let config = FieldModelConfig {
            n_subcarriers: n_sub,
            n_links,
            n_modes: 3,
            min_calibration_frames: 20,
            baseline_expiry_s: 86_400.0,
        };
        let mut model = FieldModel::new(config).unwrap();
        // Feed deterministic calibration frames: [n_links][n_sub] per observation.
        for f in 0..30 {
            let obs: Vec<Vec<f64>> = (0..n_links)
                .map(|l| {
                    (0..n_sub)
                        .map(|s| {
                            0.5 + 0.3
                                * ((f as f64 * 0.1 + l as f64 * 0.2 + s as f64 * 0.05).sin())
                        })
                        .collect()
                })
                .collect();
            model.feed_calibration(&obs).unwrap();
        }
        model.finalize_calibration(0, 0).unwrap();
        model
    }

    pub fn bench_field_model_occupancy(c: &mut Criterion) {
        let mut group = c.benchmark_group("field_model_occupancy");
        group.measurement_time(Duration::from_secs(4));
        let n_sub = 56;
        let model = calibrated_model(n_sub, 4);
        // Sliding window of recent frames (50 ~ 2.5 s @ 20 Hz).
        let frames: Vec<Vec<f64>> = (0..50)
            .map(|t| {
                (0..n_sub)
                    .map(|s| 0.5 + 0.3 * ((t as f64 * 0.15 + s as f64 * 0.07).sin()))
                    .collect()
            })
            .collect();
        group.throughput(Throughput::Elements(1));
        group.bench_function(BenchmarkId::new("eigh", n_sub), |b| {
            b.iter(|| black_box(model.estimate_occupancy(black_box(&frames))));
        });
        group.finish();
    }
}

#[cfg(feature = "eigenvalue")]
criterion_group!(
    benches,
    bench_multistatic_attention,
    bench_tomography_reconstruct,
    bench_pose_kalman_update,
    bench_spectrogram_multi_subcarrier,
    bench_fft_plan_cost,
    eig::bench_field_model_occupancy,
);

#[cfg(not(feature = "eigenvalue"))]
criterion_group!(
    benches,
    bench_multistatic_attention,
    bench_tomography_reconstruct,
    bench_pose_kalman_update,
    bench_spectrogram_multi_subcarrier,
    bench_fft_plan_cost,
);

criterion_main!(benches);