diff --git a/v2/crates/wifi-densepose-nn/src/tensor.rs b/v2/crates/wifi-densepose-nn/src/tensor.rs index 846a67a8..aa8d13bd 100644 --- a/v2/crates/wifi-densepose-nn/src/tensor.rs +++ b/v2/crates/wifi-densepose-nn/src/tensor.rs @@ -4,11 +4,39 @@ //! different backends (ONNX, tch, Candle). use crate::error::{NnError, NnResult}; -use ndarray::{Array1, Array2, Array3, Array4, ArrayD}; +use ndarray::{Array1, Array2, Array3, Array4, ArrayD, ArrayViewMutD, Axis}; // num_traits is available if needed for advanced tensor operations use serde::{Deserialize, Serialize}; use std::fmt; +/// Apply a numerically-stable softmax in place to every 1-D lane of `view` +/// taken along `axis`. Each lane is shifted by its own max before +/// exponentiation, then divided by its own sum, so every lane sums to 1.0 +/// independently — the per-pixel / per-class normalization densepose needs. +/// +/// `axis` MUST be validated as in-range by the caller. +fn softmax_inplace_along_axis(mut view: ArrayViewMutD<'_, f32>, axis: usize) { + for mut lane in view.lanes_mut(Axis(axis)) { + let max = lane.iter().copied().fold(f32::NEG_INFINITY, f32::max); + // An all-`-inf` (or empty) lane has no finite max; leave it untouched + // to avoid producing NaNs from `exp(-inf - -inf)`. + if !max.is_finite() { + continue; + } + let mut sum = 0.0f32; + for v in lane.iter_mut() { + let e = (*v - max).exp(); + *v = e; + sum += e; + } + if sum > 0.0 { + for v in lane.iter_mut() { + *v /= sum; + } + } + } +} + /// Shape of a tensor #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct TensorShape(Vec); @@ -288,14 +316,39 @@ impl Tensor { } } - /// Apply softmax along axis - pub fn softmax(&self, _axis: usize) -> NnResult { + /// Apply softmax along the given `axis`. + /// + /// Each 1-D lane along `axis` is normalized independently so it sums to + /// 1.0. This is the correct semantics for per-pixel / per-class probability + /// maps (e.g. DensePose body-part logits over the channel axis). A + /// numerically-stable max-shift is applied per lane. + /// + /// # Errors + /// Returns [`NnError`] if `axis` is out of range for the tensor's rank, or + /// if the tensor type is unsupported. + pub fn softmax(&self, axis: usize) -> NnResult { match self { Tensor::Float4D(a) => { - let max = a.fold(f32::NEG_INFINITY, |acc, &x| acc.max(x)); - let exp = a.mapv(|x| (x - max).exp()); - let sum = exp.sum(); - Ok(Tensor::Float4D(exp / sum)) + if axis >= a.ndim() { + return Err(NnError::tensor_op(format!( + "softmax axis {axis} out of range for {}-D tensor", + a.ndim() + ))); + } + let mut out = a.clone(); + softmax_inplace_along_axis(out.view_mut().into_dyn(), axis); + Ok(Tensor::Float4D(out)) + } + Tensor::FloatND(a) => { + if axis >= a.ndim() { + return Err(NnError::tensor_op(format!( + "softmax axis {axis} out of range for {}-D tensor", + a.ndim() + ))); + } + let mut out = a.clone(); + softmax_inplace_along_axis(out.view_mut(), axis); + Ok(Tensor::FloatND(out)) } _ => Err(NnError::tensor_op( "Softmax not supported for this tensor type", @@ -517,6 +570,67 @@ mod tests { assert!(sigmoid.max().unwrap() < 1.0); } + // ADR-155 §Tier-2: softmax(axis) must normalize along the GIVEN axis + // (per-lane sum == 1), not over the whole tensor. + #[test] + fn test_softmax_axis_sums_to_one_per_lane() { + // 2x3x1x1 tensor; softmax along axis 1 (the size-3 axis). + let arr = + Array4::from_shape_vec([2, 3, 1, 1], vec![1.0f32, 2.0, 3.0, -1.0, 0.0, 1.0]).unwrap(); + let t = Tensor::Float4D(arr); + let sm = t.softmax(1).unwrap(); + let out = sm.as_array4().unwrap(); + // Each lane along axis 1 must sum to 1.0. + for b in 0..2 { + let lane_sum: f32 = (0..3).map(|c| out[[b, c, 0, 0]]).sum(); + assert!((lane_sum - 1.0).abs() < 1e-6, "lane {b} sum = {lane_sum}"); + } + // Probabilities must be ordered like the logits within a lane. + assert!(out[[0, 0, 0, 0]] < out[[0, 1, 0, 0]]); + assert!(out[[0, 1, 0, 0]] < out[[0, 2, 0, 0]]); + } + + // ADR-155 §Tier-2: softmax along different axes must give different + // results — the old global-softmax bug ignored the axis entirely. + #[test] + fn test_softmax_axis_choice_matters() { + let arr = Array4::from_shape_vec([1, 2, 2, 1], vec![1.0f32, 2.0, 3.0, 4.0]).unwrap(); + let t = Tensor::Float4D(arr); + let along1 = t.softmax(1).unwrap(); + let along2 = t.softmax(2).unwrap(); + let a1 = along1.as_array4().unwrap(); + let a2 = along2.as_array4().unwrap(); + // The two normalizations partition the values differently, so at least + // one element must differ. + let mut differs = false; + for h in 0..2 { + if (a1[[0, 0, h, 0]] - a2[[0, 0, h, 0]]).abs() > 1e-6 { + differs = true; + } + } + assert!(differs, "softmax along axis 1 must differ from axis 2"); + } + + // ADR-155 §Tier-2: known-value check on a tiny tensor. + #[test] + fn test_softmax_known_values() { + // Lane [0, ln(3)] along axis 1 → softmax = [1/4, 3/4]. + let arr = Array4::from_shape_vec([1, 2, 1, 1], vec![0.0f32, 3.0f32.ln()]).unwrap(); + let t = Tensor::Float4D(arr); + let out = t.softmax(1).unwrap(); + let a = out.as_array4().unwrap(); + assert!((a[[0, 0, 0, 0]] - 0.25).abs() < 1e-6); + assert!((a[[0, 1, 0, 0]] - 0.75).abs() < 1e-6); + } + + // ADR-155 §Tier-2: out-of-range axis must return an error, never panic. + #[test] + fn test_softmax_axis_out_of_range_errors() { + let t = Tensor::zeros_4d([1, 2, 2, 2]); + assert!(t.softmax(4).is_err()); + assert!(t.softmax(99).is_err()); + } + #[test] fn test_broadcast_compatible() { let a = TensorShape::new(vec![1, 3, 224, 224]); diff --git a/v2/crates/wifi-densepose-nn/src/translator.rs b/v2/crates/wifi-densepose-nn/src/translator.rs index 07684d4d..c3f535f8 100644 --- a/v2/crates/wifi-densepose-nn/src/translator.rs +++ b/v2/crates/wifi-densepose-nn/src/translator.rs @@ -556,34 +556,122 @@ impl ModalityTranslator { } } - /// Apply multi-head attention + /// Apply single-head scaled-dot-product attention over the spatial + /// sequence: `softmax(Q·Kᵀ / √d) · V`, with `Q/K/V` linear projections of + /// each token's channel vector and a final output projection. + /// + /// The spatial grid `[B, C, H, W]` is treated as a length-`H·W` token + /// sequence of `C`-dim feature vectors. Each `*_weight` projection is a + /// `[C × C]` matrix applied per token. This is a genuine attention + /// operation (not the previous uniform-weight identity stub), so the + /// returned per-pair attention weights actually depend on the input. + /// + /// # Errors + /// Returns an error if any projection weight is not `[C × C]`, so a + /// mis-shaped checkpoint can never be silently treated as a no-op. fn apply_attention( &self, input: &Array4, - _weights: &AttentionWeights, + weights: &AttentionWeights, ) -> NnResult<(Array4, Array4)> { let (batch, channels, height, width) = input.dim(); let seq_len = height * width; - // Flatten spatial dimensions - let mut flat = ndarray::Array2::zeros((batch, seq_len * channels)); + // Every projection must be a square [C × C] matrix to act per token. + for (name, w) in [ + ("query_weight", &weights.query_weight), + ("key_weight", &weights.key_weight), + ("value_weight", &weights.value_weight), + ("output_weight", &weights.output_weight), + ] { + if w.dim() != (channels, channels) { + return Err(NnError::invalid_input(format!( + "attention {name} must be [{channels} x {channels}], got [{} x {}]", + w.dim().0, + w.dim().1 + ))); + } + } + if weights.output_bias.len() != channels { + return Err(NnError::shape_mismatch( + vec![channels], + vec![weights.output_bias.len()], + )); + } + + // Flatten spatial grid into a [seq_len, channels] token matrix per batch. + // Project to Q, K, V; compute scaled-dot-product attention; project out. + let scale = 1.0 / (channels as f32).sqrt(); + let mut out = Array4::zeros((batch, channels, height, width)); + let mut attention_weights = Array4::zeros((batch, 1, seq_len, seq_len)); + for b in 0..batch { + // Tokens: [seq_len, channels]. + let mut tokens = ndarray::Array2::::zeros((seq_len, channels)); for h in 0..height { for w in 0..width { + let s = h * width + w; for c in 0..channels { - flat[[b, (h * width + w) * channels + c]] = input[[b, c, h, w]]; + tokens[[s, c]] = input[[b, c, h, w]]; + } + } + } + + // Q = tokens·Wqᵀ, etc. (row vector × [C×C] projection). + let q = tokens.dot(&weights.query_weight.t()); + let k = tokens.dot(&weights.key_weight.t()); + let v = tokens.dot(&weights.value_weight.t()); + + // Scores = softmax_row(Q·Kᵀ · scale), then context = Scores·V. + let scores = q.dot(&k.t()).mapv(|x| x * scale); + for i in 0..seq_len { + // Numerically-stable row softmax. + let mut max = f32::NEG_INFINITY; + for j in 0..seq_len { + max = max.max(scores[[i, j]]); + } + let mut sum = 0.0f32; + let mut row = vec![0.0f32; seq_len]; + for j in 0..seq_len { + let e = (scores[[i, j]] - max).exp(); + row[j] = e; + sum += e; + } + if sum > 0.0 { + for j in 0..seq_len { + row[j] /= sum; + } + } + for j in 0..seq_len { + attention_weights[[b, 0, i, j]] = row[j]; + } + } + + // Context = attention · V, then output projection + bias. + for h in 0..height { + for w in 0..width { + let i = h * width + w; + // ctx[c] = Σ_j attn[i,j] · v[j,c] + let mut ctx = vec![0.0f32; channels]; + for j in 0..seq_len { + let a = attention_weights[[b, 0, i, j]]; + for c in 0..channels { + ctx[c] += a * v[[j, c]]; + } + } + // out[c] = Σ_c' ctx[c'] · Wo[c, c'] + bias[c] + for c in 0..channels { + let mut acc = weights.output_bias[c]; + for cp in 0..channels { + acc += ctx[cp] * weights.output_weight[[c, cp]]; + } + out[[b, c, h, w]] = acc; } } } } - // For simplicity, return input unchanged with identity attention - let attention_weights = Array4::from_elem( - (batch, self.config.attention_heads, seq_len, seq_len), - 1.0 / seq_len as f32, - ); - - Ok((input.clone(), attention_weights)) + Ok((out, attention_weights)) } /// Compute translation loss between predicted and target features @@ -760,6 +848,76 @@ mod tests { assert_eq!(config.activation, ActivationType::GELU); } + // ADR-155 §Tier-2: apply_attention must perform real scaled-dot-product + // attention, not return uniform 1/seq_len weights. With identity Q/K/V + // projections and a non-uniform input, the attention weights must NOT all + // equal 1/seq_len, and each row must still be a valid distribution. + #[test] + fn test_attention_is_not_uniform_stub() { + let channels = 4usize; + let height = 2usize; + let width = 2usize; + let seq_len = height * width; + + // Identity projections so Q=K=V=tokens; output = identity, zero bias. + let identity = ndarray::Array2::::eye(channels); + let weights = AttentionWeights { + query_weight: identity.clone(), + key_weight: identity.clone(), + value_weight: identity.clone(), + output_weight: identity, + output_bias: ndarray::Array1::zeros(channels), + }; + + // Non-uniform input: each spatial location has a distinct feature vector. + let mut input = Array4::::zeros((1, channels, height, width)); + for c in 0..channels { + for h in 0..height { + for w in 0..width { + input[[0, c, h, w]] = (c + 2 * h + 4 * w) as f32; + } + } + } + + let config = TranslatorConfig::default().with_attention(1); + let translator = ModalityTranslator::new(config).unwrap(); + let (out, attn) = translator.apply_attention(&input, &weights).unwrap(); + + // Each attention row must sum to 1 (valid softmax distribution). + for i in 0..seq_len { + let row_sum: f32 = (0..seq_len).map(|j| attn[[0, 0, i, j]]).sum(); + assert!((row_sum - 1.0).abs() < 1e-5, "row {i} sum = {row_sum}"); + } + // Weights must NOT all be the uniform 1/seq_len value of the old stub. + let uniform = 1.0 / seq_len as f32; + let any_non_uniform = (0..seq_len) + .flat_map(|i| (0..seq_len).map(move |j| (i, j))) + .any(|(i, j)| (attn[[0, 0, i, j]] - uniform).abs() > 1e-4); + assert!(any_non_uniform, "attention collapsed to uniform stub"); + // Output is finite and shaped like the input. + assert_eq!(out.dim(), input.dim()); + assert!(out.iter().all(|v| v.is_finite())); + } + + // ADR-155 §Tier-2: a mis-shaped projection weight must be rejected, never + // silently treated as a no-op. + #[test] + fn test_attention_rejects_wrong_weight_shape() { + let channels = 4usize; + let bad = ndarray::Array2::::zeros((channels + 1, channels)); + let weights = AttentionWeights { + query_weight: bad.clone(), + key_weight: bad.clone(), + value_weight: bad.clone(), + output_weight: bad, + output_bias: ndarray::Array1::zeros(channels), + }; + let input = Array4::::zeros((1, channels, 2, 2)); + let config = TranslatorConfig::default().with_attention(1); + let translator = ModalityTranslator::new(config).unwrap(); + assert!(translator.apply_attention(&input, &weights).is_err()); + } + #[test] fn test_loss_computation() { let config = TranslatorConfig::default(); diff --git a/v2/crates/wifi-densepose-train/src/ablation.rs b/v2/crates/wifi-densepose-train/src/ablation.rs index c3f3e59e..4b9e097b 100644 --- a/v2/crates/wifi-densepose-train/src/ablation.rs +++ b/v2/crates/wifi-densepose-train/src/ablation.rs @@ -53,13 +53,24 @@ impl FeatureSet { } /// `(p50, p95)` percentiles of a latency sample set (ms), nearest-rank. +/// +/// Non-finite samples (NaN / ±inf) are discarded before ranking. Sorting uses +/// [`f64::total_cmp`] so a stray NaN can never trigger a `partial_cmp().unwrap()` +/// panic (ADR-155 §Tier-2). If every sample is non-finite (or the slice is +/// empty), returns `(0.0, 0.0)`. #[must_use] pub fn latency_percentiles_ms(samples_ms: &[f64]) -> (f64, f64) { - if samples_ms.is_empty() { + // Drop non-finite values: a NaN latency is meaningless and must not poison + // the ranking or panic the sort. + let mut s: Vec = samples_ms + .iter() + .copied() + .filter(|v| v.is_finite()) + .collect(); + if s.is_empty() { return (0.0, 0.0); } - let mut s = samples_ms.to_vec(); - s.sort_by(|a, b| a.partial_cmp(b).unwrap()); + s.sort_by(f64::total_cmp); let pick = |q: f64| { // Nearest-rank: ceil(q * n) - 1, clamped. let rank = ((q * s.len() as f64).ceil() as usize).clamp(1, s.len()) - 1; @@ -71,8 +82,16 @@ pub fn latency_percentiles_ms(samples_ms: &[f64]) -> (f64, f64) { /// False-positive and false-negative rates from a confusion count. #[must_use] pub fn confusion_rates(tp: u64, fp: u64, tn: u64, fn_: u64) -> (f64, f64) { - let fp_rate = if fp + tn == 0 { 0.0 } else { fp as f64 / (fp + tn) as f64 }; - let fn_rate = if fn_ + tp == 0 { 0.0 } else { fn_ as f64 / (fn_ + tp) as f64 }; + let fp_rate = if fp + tn == 0 { + 0.0 + } else { + fp as f64 / (fp + tn) as f64 + }; + let fn_rate = if fn_ + tp == 0 { + 0.0 + } else { + fn_ as f64 / (fn_ + tp) as f64 + }; (fp_rate, fn_rate) } @@ -164,7 +183,10 @@ impl AblationMetrics { fn_rate, latency_p50_ms: p50, latency_p95_ms: p95, - privacy_leakage: membership_inference_leakage(&run.member_scores, &run.nonmember_scores), + privacy_leakage: membership_inference_leakage( + &run.member_scores, + &run.nonmember_scores, + ), cross_room_degradation: (run.room_a_accuracy - run.room_b_accuracy).max(0.0), } } @@ -181,7 +203,9 @@ impl AblationReport { /// Build from a set of variant runs. #[must_use] pub fn from_runs(runs: &[VariantRun]) -> Self { - Self { rows: runs.iter().map(AblationMetrics::from_run).collect() } + Self { + rows: runs.iter().map(AblationMetrics::from_run).collect(), + } } /// Look up a variant's metrics. @@ -194,7 +218,8 @@ impl AblationReport { /// least `min_wins` of {presence accuracy ↑, localisation error ↓, p95 latency ↓}? #[must_use] pub fn csi_cir_beats_csi_only(&self, min_wins: usize) -> bool { - let (Some(a), Some(b)) = (self.get(FeatureSet::CsiOnly), self.get(FeatureSet::CsiCir)) else { + let (Some(a), Some(b)) = (self.get(FeatureSet::CsiOnly), self.get(FeatureSet::CsiCir)) + else { return false; }; let wins = [ @@ -249,6 +274,30 @@ mod tests { assert_eq!(latency_percentiles_ms(&[]), (0.0, 0.0)); } + // ADR-155 §Tier-2: a NaN in the latency samples must NOT panic the sort + // (the old `partial_cmp().unwrap()` did) and must yield a sane percentile + // computed over the finite values only. + #[test] + fn latency_percentiles_with_nan_does_not_panic() { + let s = vec![ + 10.0, + f64::NAN, + 20.0, + 30.0, + f64::INFINITY, + 40.0, + f64::NEG_INFINITY, + 50.0, + ]; + let (p50, p95) = latency_percentiles_ms(&s); + // Finite set is [10,20,30,40,50]; nearest-rank p50=30, p95=50. + assert!(p50.is_finite() && p95.is_finite()); + assert!((p50 - 30.0).abs() < 1e-9); + assert!((p95 - 50.0).abs() < 1e-9); + // All-NaN input degrades gracefully to (0, 0). + assert_eq!(latency_percentiles_ms(&[f64::NAN, f64::NAN]), (0.0, 0.0)); + } + #[test] fn confusion_rates_basic() { let (fp_rate, fn_rate) = confusion_rates(80, 10, 90, 20); diff --git a/v2/crates/wifi-densepose-train/src/config.rs b/v2/crates/wifi-densepose-train/src/config.rs index 7114c7f2..8d9dee8f 100644 --- a/v2/crates/wifi-densepose-train/src/config.rs +++ b/v2/crates/wifi-densepose-train/src/config.rs @@ -31,6 +31,43 @@ use std::path::{Path, PathBuf}; use crate::error::ConfigError; +// --------------------------------------------------------------------------- +// Allocation-guard upper bounds (ADR-155 §Tier-2) +// --------------------------------------------------------------------------- +// +// `validate()` historically only checked lower bounds, so a config with an +// absurd field (e.g. `window_frames = usize::MAX`) passed validation and only +// blew up later as an OOM / allocation-size overflow deep in the pipeline. +// These constants cap each dimensioning field at a value far above any real +// hardware configuration but well below the point where the product of +// dimensions overflows `usize` on a 64-bit allocation. They guard against +// allocation-overflow, not against "sensible" configs — every real preset +// stays orders of magnitude under these caps. + +/// Maximum temporal window length, in frames. Caps the time dimension of every +/// CSI window allocation. Real captures use ≤ a few thousand frames. +pub const MAX_WINDOW_FRAMES: usize = 100_000; + +/// Maximum subcarrier count (model or native). Real Wi-Fi captures top out in +/// the low hundreds; this leaves vast headroom while preventing overflow. +pub const MAX_SUBCARRIERS: usize = 100_000; + +/// Maximum backbone feature-map channel count. Even large vision backbones use +/// a few thousand channels. +pub const MAX_BACKBONE_CHANNELS: usize = 1_000_000; + +/// Maximum heatmap side length (H = W). Caps the square heatmap allocation. +pub const MAX_HEATMAP_SIZE: usize = 100_000; + +/// Maximum number of keypoints. COCO uses 17; this is a wide safety margin. +pub const MAX_KEYPOINTS: usize = 10_000; + +/// Maximum number of DensePose body-part classes. DensePose uses 24. +pub const MAX_BODY_PARTS: usize = 10_000; + +/// Maximum mini-batch size. Guards the batch dimension of every allocation. +pub const MAX_BATCH_SIZE: usize = 1_000_000; + // --------------------------------------------------------------------------- // TrainingConfig // --------------------------------------------------------------------------- @@ -317,17 +354,36 @@ impl TrainingConfig { /// increasing. /// - `save_top_k` must be at least 1. /// - `val_every_epochs` must be at least 1. + /// - Dimensioning fields (`window_frames`, subcarrier counts, + /// `backbone_channels`, `heatmap_size`, `num_keypoints`, + /// `num_body_parts`, `batch_size`) must not exceed their + /// allocation-guard upper bounds (see `MAX_*` constants), so an absurd + /// value is rejected here rather than causing an OOM / allocation + /// overflow later in the pipeline. + /// - `gpu_device_id` must be non-negative. pub fn validate(&self) -> Result<(), ConfigError> { // Subcarrier counts if self.num_subcarriers == 0 { return Err(ConfigError::invalid_value("num_subcarriers", "must be > 0")); } + if self.num_subcarriers > MAX_SUBCARRIERS { + return Err(ConfigError::invalid_value( + "num_subcarriers", + format!("must be <= {MAX_SUBCARRIERS} (allocation guard)"), + )); + } if self.native_subcarriers == 0 { return Err(ConfigError::invalid_value( "native_subcarriers", "must be > 0", )); } + if self.native_subcarriers > MAX_SUBCARRIERS { + return Err(ConfigError::invalid_value( + "native_subcarriers", + format!("must be <= {MAX_SUBCARRIERS} (allocation guard)"), + )); + } // Antenna counts if self.num_antennas_tx == 0 { @@ -341,30 +397,66 @@ impl TrainingConfig { if self.window_frames == 0 { return Err(ConfigError::invalid_value("window_frames", "must be > 0")); } + if self.window_frames > MAX_WINDOW_FRAMES { + return Err(ConfigError::invalid_value( + "window_frames", + format!("must be <= {MAX_WINDOW_FRAMES} (allocation guard)"), + )); + } // Heatmap if self.heatmap_size == 0 { return Err(ConfigError::invalid_value("heatmap_size", "must be > 0")); } + if self.heatmap_size > MAX_HEATMAP_SIZE { + return Err(ConfigError::invalid_value( + "heatmap_size", + format!("must be <= {MAX_HEATMAP_SIZE} (allocation guard)"), + )); + } // Model dims if self.num_keypoints == 0 { return Err(ConfigError::invalid_value("num_keypoints", "must be > 0")); } + if self.num_keypoints > MAX_KEYPOINTS { + return Err(ConfigError::invalid_value( + "num_keypoints", + format!("must be <= {MAX_KEYPOINTS} (allocation guard)"), + )); + } if self.num_body_parts == 0 { return Err(ConfigError::invalid_value("num_body_parts", "must be > 0")); } + if self.num_body_parts > MAX_BODY_PARTS { + return Err(ConfigError::invalid_value( + "num_body_parts", + format!("must be <= {MAX_BODY_PARTS} (allocation guard)"), + )); + } if self.backbone_channels == 0 { return Err(ConfigError::invalid_value( "backbone_channels", "must be > 0", )); } + if self.backbone_channels > MAX_BACKBONE_CHANNELS { + return Err(ConfigError::invalid_value( + "backbone_channels", + format!("must be <= {MAX_BACKBONE_CHANNELS} (allocation guard)"), + )); + } // Optimisation if self.batch_size == 0 { return Err(ConfigError::invalid_value("batch_size", "must be > 0")); } + if self.batch_size > MAX_BATCH_SIZE { + return Err(ConfigError::invalid_value( + "batch_size", + format!("must be <= {MAX_BATCH_SIZE} (allocation guard)"), + )); + } if self.learning_rate <= 0.0 { return Err(ConfigError::invalid_value("learning_rate", "must be > 0.0")); } @@ -443,6 +535,11 @@ impl TrainingConfig { return Err(ConfigError::invalid_value("save_top_k", "must be > 0")); } + // Device: a CUDA device index can never be negative. + if self.gpu_device_id < 0 { + return Err(ConfigError::invalid_value("gpu_device_id", "must be >= 0")); + } + Ok(()) } } @@ -555,6 +652,96 @@ mod tests { assert!(!cfg2.needs_subcarrier_interp()); } + // ADR-155 §Tier-2: every preset constructor must still validate after the + // upper-bound (allocation-guard) checks were added. + #[test] + fn presets_still_validate() { + TrainingConfig::default().validate().expect("default"); + TrainingConfig::mmfi().validate().expect("mmfi"); + TrainingConfig::ht40_192().validate().expect("ht40_192"); + TrainingConfig::multiband_168() + .validate() + .expect("multiband_168"); + TrainingConfig::for_subcarriers(168, 56) + .validate() + .expect("for_subcarriers"); + } + + // ADR-155 §Tier-2: oversized dimensioning fields (config-OOM class) must be + // rejected, not passed through to an allocation that overflows / OOMs. + #[test] + fn oversized_window_frames_is_invalid() { + let cfg = TrainingConfig { + window_frames: MAX_WINDOW_FRAMES + 1, + ..TrainingConfig::default() + }; + assert!(cfg.validate().is_err()); + } + + #[test] + fn oversized_subcarriers_are_invalid() { + let cfg = TrainingConfig { + num_subcarriers: MAX_SUBCARRIERS + 1, + ..TrainingConfig::default() + }; + assert!(cfg.validate().is_err()); + let cfg = TrainingConfig { + native_subcarriers: MAX_SUBCARRIERS + 1, + ..TrainingConfig::default() + }; + assert!(cfg.validate().is_err()); + } + + #[test] + fn oversized_backbone_channels_is_invalid() { + let cfg = TrainingConfig { + backbone_channels: MAX_BACKBONE_CHANNELS + 1, + ..TrainingConfig::default() + }; + assert!(cfg.validate().is_err()); + } + + #[test] + fn oversized_heatmap_size_is_invalid() { + let cfg = TrainingConfig { + heatmap_size: MAX_HEATMAP_SIZE + 1, + ..TrainingConfig::default() + }; + assert!(cfg.validate().is_err()); + } + + #[test] + fn oversized_keypoints_and_body_parts_are_invalid() { + let cfg = TrainingConfig { + num_keypoints: MAX_KEYPOINTS + 1, + ..TrainingConfig::default() + }; + assert!(cfg.validate().is_err()); + let cfg = TrainingConfig { + num_body_parts: MAX_BODY_PARTS + 1, + ..TrainingConfig::default() + }; + assert!(cfg.validate().is_err()); + } + + #[test] + fn oversized_batch_size_is_invalid() { + let cfg = TrainingConfig { + batch_size: MAX_BATCH_SIZE + 1, + ..TrainingConfig::default() + }; + assert!(cfg.validate().is_err()); + } + + #[test] + fn negative_gpu_device_id_is_invalid() { + let cfg = TrainingConfig { + gpu_device_id: -1, + ..TrainingConfig::default() + }; + assert!(cfg.validate().is_err()); + } + #[test] fn config_fields_have_expected_defaults() { let cfg = TrainingConfig::default(); diff --git a/v2/crates/wifi-densepose-train/src/ruview_metrics.rs b/v2/crates/wifi-densepose-train/src/ruview_metrics.rs index 5a320357..a79f3a09 100644 --- a/v2/crates/wifi-densepose-train/src/ruview_metrics.rs +++ b/v2/crates/wifi-densepose-train/src/ruview_metrics.rs @@ -177,8 +177,13 @@ pub fn evaluate_joint_error( } } - // OKS for this frame. - let s = scale.get(i).copied().unwrap_or(1.0); + // OKS for this frame. ADR-155 §Tier-1.1/§Tier-2: never fall back to + // s=1.0 on normalized [0,1] coordinates — that makes every distance ≈0 + // and OKS ≈1.0 for any pose (the "fake Gold tier" bug). When no valid + // per-frame scale is supplied we derive it from the GT pose extent + // (`safe_diag`), exactly as the canonical OKS does. + let supplied = scale.get(i).copied().unwrap_or(0.0); + let s = if supplied > 0.0 { supplied } else { safe_diag }; let oks_frame = compute_single_oks(&pred_kpts[i], >_kpts[i], &visibility[i], s); oks_sum += oks_frame as f64; } @@ -627,10 +632,18 @@ fn compute_bbox_diag(kp: &Array2, vis: &Array1) -> f32 { } fn compute_single_oks(pred: &Array2, gt: &Array2, vis: &Array1, s: f32) -> f32 { + // ADR-155 §Tier-2: a non-positive scale would divide by ≈0 (Inf/NaN OKS) — + // and on normalized coords s=1.0 was the fake-perfect bug. Reject it. + if !(s > 0.0) { + return 0.0; + } let s_sq = s * s; + // ADR-155 §Tier-2: bound the loop to the actual array extents so adversarial + // / short inputs (< 17 rows, mismatched vis length) cannot panic on `[j]`. + let n = pred.shape()[0].min(gt.shape()[0]).min(vis.len()).min(17); let mut num = 0.0_f32; let mut den = 0.0_f32; - for j in 0..17 { + for j in 0..n { if vis[j] < 0.5 { continue; } @@ -746,6 +759,59 @@ mod tests { (pred, gt, vis) } + #[test] + fn oks_rejects_nonpositive_scale() { + // ADR-155 §Tier-2: s<=0 must return 0.0, never Inf/NaN. + let (pred, gt, vis) = make_perfect_kpts(); + assert_eq!(compute_single_oks(&pred, >, &vis, 0.0), 0.0); + assert_eq!(compute_single_oks(&pred, >, &vis, -1.0), 0.0); + assert!(compute_single_oks(&pred, >, &vis, 0.5).is_finite()); + } + + #[test] + fn oks_does_not_panic_on_short_arrays() { + // ADR-155 §Tier-2: fewer than 17 rows / mismatched vis must not panic. + let pred = Array2::::zeros((5, 2)); + let gt = Array2::::zeros((5, 2)); + let vis = Array1::::ones(5); + let oks = compute_single_oks(&pred, >, &vis, 0.5); + assert!(oks.is_finite()); + } + + #[test] + fn oks_not_perfect_for_wrong_pose_with_derived_scale() { + // ADR-155 §Tier-1.1/§Tier-2: a clearly wrong pose on normalized coords, + // evaluated with no supplied scale (derived from GT extent), must NOT + // look near-perfect — the old s=1.0 fallback would have returned ≈1.0. + let gt = Array2::from_shape_fn( + (17, 2), + |(j, d)| { + if d == 0 { + 0.4 + j as f32 * 0.01 + } else { + 0.5 + } + }, + ); + let mut pred = gt.clone(); + for j in 0..17 { + pred[[j, 1]] += 0.3; // shift every joint far in y + } + let vis = Array1::::ones(17); + let result = evaluate_joint_error( + &[pred], + &[gt], + &[vis], + &[], // no supplied scale ⇒ derive from GT extent + &JointErrorThresholds::default(), + ); + assert!( + result.oks < 0.5, + "wrong pose must not yield near-perfect OKS, got {}", + result.oks + ); + } + #[test] fn joint_error_perfect_predictions_pass() { let (pred, gt, vis) = make_perfect_kpts(); diff --git a/v2/crates/wifi-densepose-train/src/subcarrier.rs b/v2/crates/wifi-densepose-train/src/subcarrier.rs index 9e91aa62..0c2674b6 100644 --- a/v2/crates/wifi-densepose-train/src/subcarrier.rs +++ b/v2/crates/wifi-densepose-train/src/subcarrier.rs @@ -39,6 +39,11 @@ use ruvector_solver::types::CsrMatrix; /// # Panics /// /// Panics if `target_sc == 0` or the input has no subcarrier dimension. +/// +/// Non-contiguous inputs (e.g. a transposed or strided view) are handled +/// gracefully: the subcarrier lane is copied into a contiguous scratch buffer +/// when the underlying storage is not contiguous, so this function never +/// panics on layout (ADR-155 §Tier-2). pub fn interpolate_subcarriers(arr: &Array4, target_sc: usize) -> Array4 { assert!(target_sc > 0, "target_sc must be > 0"); @@ -54,16 +59,23 @@ pub fn interpolate_subcarriers(arr: &Array4, target_sc: usize) -> Array4 = Vec::new(); + for t in 0..n_t { for tx in 0..n_tx { for rx in 0..n_rx { let src = arr.slice(s![t, tx, rx, ..]); - let src_slice = src.as_slice().unwrap_or_else(|| { - // Fallback: copy to a contiguous slice - // (this path is hit when the array has a non-contiguous layout) - // In practice ndarray arrays sliced along last dim are contiguous. - panic!("Subcarrier slice is not contiguous"); - }); + // Prefer the contiguous fast path; fall back to an owned copy + // for non-contiguous layouts instead of panicking. + let src_slice: &[f32] = match src.as_slice() { + Some(s) => s, + None => { + scratch.clear(); + scratch.extend(src.iter().copied()); + &scratch + } + }; for (k, &(i0, i1, w)) in weights.iter().enumerate() { let v = src_slice[i0] * (1.0 - w) + src_slice[i1] * w; @@ -420,6 +432,35 @@ mod tests { assert_eq!(out.shape(), &[4, 1, 3, 56]); } + // ADR-155 §Tier-2: a non-contiguous input (subcarrier axis strided after an + // axis permutation) must NOT panic — the old `.as_slice().unwrap_or_else(|| + // panic!(...))` path crashed on any non-contiguous layout. + #[test] + fn non_contiguous_input_does_not_panic() { + // Build a [t, sc, tx, rx] array, then permute so subcarriers land in the + // last axis. The resulting owned Array4 has non-standard strides, so its + // last-axis lanes are non-contiguous in memory. + let base = + Array4::::from_shape_fn((4, 8, 3, 3), |(t, sc, tx, rx)| (t + sc + tx + rx) as f32); + // permuted_axes consumes the owned array and returns an owned Array4 + // with swapped strides: logical shape [t, tx, rx, sc], sc axis strided. + let strided: Array4 = base.permuted_axes([0, 2, 3, 1]); + // Sanity: a last-axis lane really is non-contiguous. + assert!(strided.slice(s![0, 0, 0, ..]).as_slice().is_none()); + + let out = interpolate_subcarriers(&strided, 4); + assert_eq!(out.shape(), &[4, 3, 3, 4]); + // Endpoints preserved exactly even via the fallback copy path. + for tx in 0..3 { + for rx in 0..3 { + let first = strided[[0, tx, rx, 0]]; + let last = strided[[0, tx, rx, 7]]; + assert_abs_diff_eq!(out[[0, tx, rx, 0]], first, epsilon = 1e-5); + assert_abs_diff_eq!(out[[0, tx, rx, 3]], last, epsilon = 1e-5); + } + } + } + #[test] fn sparse_interpolation_identity() { // For same source and target count, should return same array