613 lines
23 KiB
Rust
613 lines
23 KiB
Rust
//! Integration tests for `wifi_densepose_train` pose metrics.
|
||
//!
|
||
//! # ADR-155 Milestone-1 — §8 "reference kernels" resolution
|
||
//!
|
||
//! The full `metrics` module is gated behind `tch-backend` (libtorch), but the
|
||
//! **canonical** metric core (`pck_canonical` / `oks_canonical`) now lives in
|
||
//! the un-gated `metrics_core` module and is re-exported at the crate root, so
|
||
//! these workspace tests (run under `--no-default-features`) validate the
|
||
//! **production** functions directly.
|
||
//!
|
||
//! Previously this file carried its own local `compute_pck` / `compute_oks`
|
||
//! reimplementations and asserted properties of *those* — a test that could
|
||
//! not catch a bug in the canonical implementation (both could be wrong the
|
||
//! same way). That is fixed two ways here:
|
||
//!
|
||
//! 1. **Fixture tests** (`canonical_pck_matches_hand_computed_fixture`,
|
||
//! `canonical_oks_*`) assert the production `pck_canonical` / `oks_canonical`
|
||
//! equal *hand-computed* expected values — numbers worked out by hand below,
|
||
//! NOT a second implementation of the same algorithm.
|
||
//! 2. **Differential test** (`test_kernel_agrees_with_canonical`) keeps a small
|
||
//! independent reference kernel and asserts it **agrees** with the canonical
|
||
//! function on shared inputs (in the torso=raw-threshold regime where the two
|
||
//! coincide), so the reference adds genuine cross-check value rather than
|
||
//! duplicating the algorithm under test.
|
||
//!
|
||
//! `EvalMetrics` tests remain `#[cfg(feature = "tch-backend")]` (that type is in
|
||
//! the gated module). All inputs are fixed, deterministic arrays — no `rand`,
|
||
//! no OS entropy.
|
||
|
||
use ndarray::{Array1, Array2};
|
||
use wifi_densepose_train::{oks_canonical, pck_canonical, CANON_LEFT_HIP, CANON_RIGHT_HIP};
|
||
// ADR-155 §Tier-1.2 — metric-locked accuracy harness public surface.
|
||
use wifi_densepose_train::{accuracy_report, pck_at, PckNormalization, PoseFrame};
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Metric-locked accuracy harness: the three PCK normalizations are reachable
|
||
// from the crate root and give DIFFERENT PCK on identical predictions — the
|
||
// proof that the 96 / 81.6 / 61 figures were non-comparable (validated here as
|
||
// a downstream consumer would call it).
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/// Identical predictions, three declared normalizations ⇒ three distinct PCK.
|
||
/// Hand calc (all coords in `[0,1]`):
|
||
/// * GT: nose(0)=(0.50,0.10), l_sh(5)=(0.50,0.30), hips=(0.40,0.90)/(0.60,0.90).
|
||
/// * Pred: nose err 0.06, shoulder err 0.10, hips exact.
|
||
/// * torso = 0.20 ⇒ τ@20 = 0.04 ⇒ only hips correct ⇒ 2/4 = **0.50**.
|
||
/// * bbox = √(0.20²+0.80²)=0.82462 ⇒ τ@20 = 0.16492 ⇒ all correct ⇒ **1.00**.
|
||
/// * abs(0.08): nose 0.06≤0.08 ok, shoulder 0.10>0.08 wrong ⇒ 3/4 = **0.75**.
|
||
#[test]
|
||
fn harness_three_normalizations_differ_from_crate_root() {
|
||
let gt = pose17(&[
|
||
(0, 0.50, 0.10),
|
||
(5, 0.50, 0.30),
|
||
(CANON_LEFT_HIP, 0.40, 0.90),
|
||
(CANON_RIGHT_HIP, 0.60, 0.90),
|
||
]);
|
||
let pred = pose17(&[
|
||
(0, 0.56, 0.10),
|
||
(5, 0.60, 0.30),
|
||
(CANON_LEFT_HIP, 0.40, 0.90),
|
||
(CANON_RIGHT_HIP, 0.60, 0.90),
|
||
]);
|
||
let vis = vis17(&[0, 5, CANON_LEFT_HIP, CANON_RIGHT_HIP]);
|
||
|
||
let (_, _, torso) = pck_at(&pred, >, &vis, 20, PckNormalization::TorsoDiameter);
|
||
let (_, _, bbox) = pck_at(&pred, >, &vis, 20, PckNormalization::BoundingBoxDiagonal);
|
||
let (_, _, abs) = pck_at(&pred, >, &vis, 20, PckNormalization::AbsolutePixels(0.08));
|
||
|
||
assert!((torso - 0.50).abs() < 1e-6, "torso PCK 0.50, got {torso}");
|
||
assert!((bbox - 1.00).abs() < 1e-6, "bbox PCK 1.00, got {bbox}");
|
||
assert!((abs - 0.75).abs() < 1e-6, "abs(0.08) PCK 0.75, got {abs}");
|
||
assert!(
|
||
torso != bbox && bbox != abs && torso != abs,
|
||
"three normalizations must be distinct: {torso} / {bbox} / {abs}"
|
||
);
|
||
}
|
||
|
||
/// `accuracy_report` returns a self-describing result carrying its normalization,
|
||
/// so an unlabeled PCK number is structurally impossible at the API boundary.
|
||
#[test]
|
||
fn harness_report_carries_normalization_label() {
|
||
let gt = pose17(&[(CANON_LEFT_HIP, 0.40, 0.50), (CANON_RIGHT_HIP, 0.60, 0.50)]);
|
||
let vis = vis17(&[CANON_LEFT_HIP, CANON_RIGHT_HIP]);
|
||
let frame = PoseFrame { pred: gt.clone(), gt: gt.clone(), visibility: vis };
|
||
let report = accuracy_report(&[frame], &[20], PckNormalization::BoundingBoxDiagonal);
|
||
assert_eq!(report.normalization, PckNormalization::BoundingBoxDiagonal);
|
||
assert_eq!(report.n_keypoints, 17);
|
||
assert_eq!(report.n_frames, 1);
|
||
assert!((report.pck(20).unwrap() - 1.0).abs() < 1e-6);
|
||
assert!(report.summary().contains("bbox-diagonal"));
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Tests that use `EvalMetrics` (requires tch-backend because the metrics
|
||
// module is feature-gated in lib.rs)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
#[cfg(feature = "tch-backend")]
|
||
mod eval_metrics_tests {
|
||
use wifi_densepose_train::metrics::EvalMetrics;
|
||
|
||
/// A freshly constructed [`EvalMetrics`] should hold exactly the values
|
||
/// that were passed in.
|
||
#[test]
|
||
fn eval_metrics_stores_correct_values() {
|
||
let m = EvalMetrics {
|
||
mpjpe: 0.05,
|
||
pck_at_05: 0.92,
|
||
gps: 1.3,
|
||
};
|
||
|
||
assert!(
|
||
(m.mpjpe - 0.05).abs() < 1e-12,
|
||
"mpjpe must be 0.05, got {}",
|
||
m.mpjpe
|
||
);
|
||
assert!(
|
||
(m.pck_at_05 - 0.92).abs() < 1e-12,
|
||
"pck_at_05 must be 0.92, got {}",
|
||
m.pck_at_05
|
||
);
|
||
assert!(
|
||
(m.gps - 1.3).abs() < 1e-12,
|
||
"gps must be 1.3, got {}",
|
||
m.gps
|
||
);
|
||
}
|
||
|
||
/// `pck_at_05` of a perfect prediction must be 1.0.
|
||
#[test]
|
||
fn pck_perfect_prediction_is_one() {
|
||
let m = EvalMetrics {
|
||
mpjpe: 0.0,
|
||
pck_at_05: 1.0,
|
||
gps: 0.0,
|
||
};
|
||
assert!(
|
||
(m.pck_at_05 - 1.0).abs() < 1e-9,
|
||
"perfect prediction must yield pck_at_05 = 1.0, got {}",
|
||
m.pck_at_05
|
||
);
|
||
}
|
||
|
||
/// `pck_at_05` of a completely wrong prediction must be 0.0.
|
||
#[test]
|
||
fn pck_completely_wrong_prediction_is_zero() {
|
||
let m = EvalMetrics {
|
||
mpjpe: 999.0,
|
||
pck_at_05: 0.0,
|
||
gps: 999.0,
|
||
};
|
||
assert!(
|
||
m.pck_at_05.abs() < 1e-9,
|
||
"completely wrong prediction must yield pck_at_05 = 0.0, got {}",
|
||
m.pck_at_05
|
||
);
|
||
}
|
||
|
||
/// `mpjpe` must be 0.0 when predicted and GT positions are identical.
|
||
#[test]
|
||
fn mpjpe_perfect_prediction_is_zero() {
|
||
let m = EvalMetrics {
|
||
mpjpe: 0.0,
|
||
pck_at_05: 1.0,
|
||
gps: 0.0,
|
||
};
|
||
assert!(
|
||
m.mpjpe.abs() < 1e-12,
|
||
"perfect prediction must yield mpjpe = 0.0, got {}",
|
||
m.mpjpe
|
||
);
|
||
}
|
||
|
||
/// `mpjpe` must increase monotonically with prediction error.
|
||
#[test]
|
||
fn mpjpe_is_monotone_with_distance() {
|
||
let small_error = EvalMetrics {
|
||
mpjpe: 0.01,
|
||
pck_at_05: 0.99,
|
||
gps: 0.1,
|
||
};
|
||
let medium_error = EvalMetrics {
|
||
mpjpe: 0.10,
|
||
pck_at_05: 0.70,
|
||
gps: 1.0,
|
||
};
|
||
let large_error = EvalMetrics {
|
||
mpjpe: 0.50,
|
||
pck_at_05: 0.20,
|
||
gps: 5.0,
|
||
};
|
||
|
||
assert!(
|
||
small_error.mpjpe < medium_error.mpjpe,
|
||
"small error mpjpe must be < medium error mpjpe"
|
||
);
|
||
assert!(
|
||
medium_error.mpjpe < large_error.mpjpe,
|
||
"medium error mpjpe must be < large error mpjpe"
|
||
);
|
||
}
|
||
|
||
/// GPS must be 0.0 for a perfect DensePose prediction.
|
||
#[test]
|
||
fn gps_perfect_prediction_is_zero() {
|
||
let m = EvalMetrics {
|
||
mpjpe: 0.0,
|
||
pck_at_05: 1.0,
|
||
gps: 0.0,
|
||
};
|
||
assert!(
|
||
m.gps.abs() < 1e-12,
|
||
"perfect prediction must yield gps = 0.0, got {}",
|
||
m.gps
|
||
);
|
||
}
|
||
|
||
/// GPS must increase monotonically as prediction quality degrades.
|
||
#[test]
|
||
fn gps_monotone_with_distance() {
|
||
let perfect = EvalMetrics {
|
||
mpjpe: 0.0,
|
||
pck_at_05: 1.0,
|
||
gps: 0.0,
|
||
};
|
||
let imperfect = EvalMetrics {
|
||
mpjpe: 0.1,
|
||
pck_at_05: 0.8,
|
||
gps: 2.0,
|
||
};
|
||
let poor = EvalMetrics {
|
||
mpjpe: 0.5,
|
||
pck_at_05: 0.3,
|
||
gps: 8.0,
|
||
};
|
||
|
||
assert!(
|
||
perfect.gps < imperfect.gps,
|
||
"perfect GPS must be < imperfect GPS"
|
||
);
|
||
assert!(imperfect.gps < poor.gps, "imperfect GPS must be < poor GPS");
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Canonical PCK / OKS validation (production functions, no tch)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/// Build a 17-joint pose in `[0,1]` coordinates from an `(x, y)` per-joint list,
|
||
/// padding any unspecified joint to `(0,0)`. Returns `[17, 2]`.
|
||
fn pose17(joints: &[(usize, f32, f32)]) -> Array2<f32> {
|
||
let mut a = Array2::<f32>::zeros((17, 2));
|
||
for &(j, x, y) in joints {
|
||
a[[j, 0]] = x;
|
||
a[[j, 1]] = y;
|
||
}
|
||
a
|
||
}
|
||
|
||
/// Visibility vector with the listed joints visible (`2.0`), rest invisible.
|
||
fn vis17(visible: &[usize]) -> Array1<f32> {
|
||
let mut v = Array1::<f32>::zeros(17);
|
||
for &j in visible {
|
||
v[j] = 2.0;
|
||
}
|
||
v
|
||
}
|
||
|
||
/// **Fixture test (Goal B).** The production `pck_canonical` must equal a value
|
||
/// worked out *by hand* on a constructed pose — not a reimplementation.
|
||
///
|
||
/// Construction (all coordinates in `[0,1]`):
|
||
/// * left_hip(11) = (0.40, 0.50), right_hip(12) = (0.60, 0.50)
|
||
/// ⇒ canonical torso = hip↔hip width = 0.20.
|
||
/// * threshold = 0.2 ⇒ dist_threshold = 0.2 × 0.20 = **0.04**.
|
||
/// * Visible joints: {0 (nose), 5 (l_shoulder), 11, 12}. (4 visible.)
|
||
/// - nose(0): pred == gt ⇒ dist 0.00 ≤ 0.04 ⇒ CORRECT
|
||
/// - l_shoulder(5): pred off by dy=0.10 ⇒ dist 0.10 > 0.04 ⇒ wrong
|
||
/// - l_hip(11): pred == gt ⇒ dist 0.00 ≤ 0.04 ⇒ CORRECT
|
||
/// - r_hip(12): pred off by dx=0.03 ⇒ dist 0.03 ≤ 0.04 ⇒ CORRECT
|
||
/// Hand result: correct = 3, total = 4, pck = 3/4 = **0.75**.
|
||
#[test]
|
||
fn canonical_pck_matches_hand_computed_fixture() {
|
||
let gt = pose17(&[
|
||
(0, 0.50, 0.20), // nose
|
||
(5, 0.35, 0.35), // left_shoulder
|
||
(CANON_LEFT_HIP, 0.40, 0.50),
|
||
(CANON_RIGHT_HIP, 0.60, 0.50),
|
||
]);
|
||
let pred = pose17(&[
|
||
(0, 0.50, 0.20), // exact
|
||
(5, 0.35, 0.45), // off by dy = 0.10 (> 0.04)
|
||
(CANON_LEFT_HIP, 0.40, 0.50), // exact
|
||
(CANON_RIGHT_HIP, 0.63, 0.50), // off by dx = 0.03 (<= 0.04)
|
||
]);
|
||
let vis = vis17(&[0, 5, CANON_LEFT_HIP, CANON_RIGHT_HIP]);
|
||
|
||
let (correct, total, pck) = pck_canonical(&pred, >, &vis, 0.2);
|
||
assert_eq!(total, 4, "4 visible joints expected, got {total}");
|
||
assert_eq!(correct, 3, "hand-computed: 3 of 4 within 0.04, got {correct}");
|
||
assert!(
|
||
(pck - 0.75).abs() < 1e-6,
|
||
"hand-computed PCK is 0.75, got {pck}"
|
||
);
|
||
}
|
||
|
||
/// Pin the **normalizer**: PCK uses hip↔hip torso width. A prediction error of
|
||
/// 0.18 (just under 0.2 × torso=1.0 wide hips) is CORRECT, but the same error
|
||
/// is WRONG once the hips are squeezed to width 0.20 (threshold 0.04). If the
|
||
/// implementation ignored the torso normalizer this test would fail.
|
||
#[test]
|
||
fn canonical_pck_uses_hip_to_hip_torso_normalizer() {
|
||
// Wide hips: width 1.0 ⇒ threshold 0.2. An error of 0.18 on joint 5 is OK.
|
||
let gt_wide = pose17(&[(5, 0.50, 0.50), (CANON_LEFT_HIP, 0.0, 0.5), (CANON_RIGHT_HIP, 1.0, 0.5)]);
|
||
let pred_wide = pose17(&[(5, 0.68, 0.50), (CANON_LEFT_HIP, 0.0, 0.5), (CANON_RIGHT_HIP, 1.0, 0.5)]);
|
||
let vis = vis17(&[5, CANON_LEFT_HIP, CANON_RIGHT_HIP]);
|
||
let (_, _, pck_wide) = pck_canonical(&pred_wide, >_wide, &vis, 0.2);
|
||
|
||
// Narrow hips: width 0.20 ⇒ threshold 0.04. Same 0.18 error on joint 5 is wrong.
|
||
let gt_narrow = pose17(&[(5, 0.50, 0.50), (CANON_LEFT_HIP, 0.40, 0.5), (CANON_RIGHT_HIP, 0.60, 0.5)]);
|
||
let pred_narrow = pose17(&[(5, 0.68, 0.50), (CANON_LEFT_HIP, 0.40, 0.5), (CANON_RIGHT_HIP, 0.60, 0.5)]);
|
||
let (_, _, pck_narrow) = pck_canonical(&pred_narrow, >_narrow, &vis, 0.2);
|
||
|
||
// Joints 11/12 are exact (correct in both); joint 5 flips.
|
||
// Wide: 3/3 = 1.0; Narrow: 2/3 ≈ 0.667.
|
||
assert!((pck_wide - 1.0).abs() < 1e-6, "wide-hip PCK should be 1.0, got {pck_wide}");
|
||
assert!(
|
||
(pck_narrow - 2.0 / 3.0).abs() < 1e-6,
|
||
"narrow-hip PCK should be 2/3 (joint 5 now out of tolerance), got {pck_narrow}"
|
||
);
|
||
}
|
||
|
||
/// The claim-inflating bug: no visible joints must score **0.0**, never 1.0.
|
||
#[test]
|
||
fn canonical_pck_zero_visible_is_zero() {
|
||
let kpts = pose17(&[(CANON_LEFT_HIP, 0.4, 0.5), (CANON_RIGHT_HIP, 0.6, 0.5)]);
|
||
let vis = vis17(&[]); // nothing visible
|
||
let (correct, total, pck) = pck_canonical(&kpts, &kpts, &vis, 0.2);
|
||
assert_eq!((correct, total), (0, 0));
|
||
assert_eq!(pck, 0.0, "no-visible-joint PCK must be 0.0 (not the old 1.0)");
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Canonical OKS validation (production function, no tch)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/// **Fixture test (Goal B).** A perfect prediction (pred == gt) makes every
|
||
/// Gaussian term `exp(0) = 1`, so the canonical OKS is exactly **1.0** —
|
||
/// hand-evident, independent of the (positive) scale.
|
||
#[test]
|
||
fn canonical_oks_perfect_prediction_is_one() {
|
||
let gt = pose17(&[
|
||
(0, 0.50, 0.20),
|
||
(5, 0.35, 0.35),
|
||
(CANON_LEFT_HIP, 0.40, 0.50),
|
||
(CANON_RIGHT_HIP, 0.60, 0.50),
|
||
]);
|
||
let vis = vis17(&[0, 5, CANON_LEFT_HIP, CANON_RIGHT_HIP]);
|
||
let oks = oks_canonical(>, >, &vis);
|
||
assert!(
|
||
(oks - 1.0).abs() < 1e-6,
|
||
"OKS for a perfect prediction must be 1.0, got {oks}"
|
||
);
|
||
}
|
||
|
||
/// **The "fake Gold tier" bug, pinned (Goal B).** On normalized `[0,1]`
|
||
/// coordinates the historical `s = 1.0` path returned ≈1.0 for *any* pose.
|
||
/// Canonical derives `s` from the pose extent (here torso width = 0.20), so a
|
||
/// pose whose visible non-hip joint is off by ~3× the torso scores far below
|
||
/// the "Gold" tier. Hand bound: for joint 5 with d ≈ 0.60, s = 0.20, k = 0.079,
|
||
/// the exponent `-d²/(2 s² k²)` is enormously negative ⇒ that term ≈ 0; the two
|
||
/// (exact) hip terms give 1 each ⇒ OKS ≈ 2/3 at most, and with joint-5 ≈ 0 the
|
||
/// mean is ≈ 0.667. We assert it is comfortably **< 0.8** (and the wrong joint
|
||
/// contributes ≈ 0), i.e. nowhere near the old ≈1.0.
|
||
#[test]
|
||
fn canonical_oks_not_one_for_wrong_pose_on_normalized_coords() {
|
||
let gt = pose17(&[
|
||
(5, 0.30, 0.50),
|
||
(CANON_LEFT_HIP, 0.40, 0.50),
|
||
(CANON_RIGHT_HIP, 0.60, 0.50),
|
||
]);
|
||
// Joint 5 dragged 0.60 away (3× the 0.20 torso); hips exact.
|
||
let pred = pose17(&[
|
||
(5, 0.90, 0.50),
|
||
(CANON_LEFT_HIP, 0.40, 0.50),
|
||
(CANON_RIGHT_HIP, 0.60, 0.50),
|
||
]);
|
||
let vis = vis17(&[5, CANON_LEFT_HIP, CANON_RIGHT_HIP]);
|
||
let oks = oks_canonical(&pred, >, &vis);
|
||
assert!(
|
||
oks < 0.8,
|
||
"wrong-pose OKS on [0,1] coords must NOT be ≈1.0 (fake-Gold bug); got {oks}"
|
||
);
|
||
// The two exact hips alone give 2/3; the wrong joint must add ~nothing.
|
||
assert!(
|
||
(oks - 2.0 / 3.0).abs() < 0.05,
|
||
"wrong joint should contribute ≈0 ⇒ OKS ≈ 2/3, got {oks}"
|
||
);
|
||
}
|
||
|
||
/// Canonical OKS decreases monotonically with prediction error.
|
||
#[test]
|
||
fn canonical_oks_decreases_with_distance() {
|
||
let gt = pose17(&[(5, 0.50, 0.50), (CANON_LEFT_HIP, 0.40, 0.50), (CANON_RIGHT_HIP, 0.60, 0.50)]);
|
||
let vis = vis17(&[5, CANON_LEFT_HIP, CANON_RIGHT_HIP]);
|
||
let mk = |x5: f32| pose17(&[(5, x5, 0.50), (CANON_LEFT_HIP, 0.40, 0.50), (CANON_RIGHT_HIP, 0.60, 0.50)]);
|
||
|
||
let oks0 = oks_canonical(&mk(0.50), >, &vis);
|
||
let oks1 = oks_canonical(&mk(0.52), >, &vis);
|
||
let oks2 = oks_canonical(&mk(0.60), >, &vis);
|
||
assert!(oks0 > oks1, "OKS must drop as error grows: {oks0} vs {oks1}");
|
||
assert!(oks1 > oks2, "OKS must drop as error grows: {oks1} vs {oks2}");
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Differential cross-check: independent reference kernel vs canonical (Goal B)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/// A deliberately *independent* PCK reference implementation in the simplest
|
||
/// regime — a **raw distance threshold** (no torso normalization). It is kept
|
||
/// only to cross-check the canonical function, not to define the metric.
|
||
fn reference_pck_raw(pred: &[(f32, f32)], gt: &[(f32, f32)], dist_threshold: f32) -> (usize, usize, f32) {
|
||
let n = pred.len().min(gt.len());
|
||
let mut correct = 0usize;
|
||
for i in 0..n {
|
||
let dx = pred[i].0 - gt[i].0;
|
||
let dy = pred[i].1 - gt[i].1;
|
||
if (dx * dx + dy * dy).sqrt() <= dist_threshold {
|
||
correct += 1;
|
||
}
|
||
}
|
||
let pck = if n > 0 { correct as f32 / n as f32 } else { 0.0 };
|
||
(correct, n, pck)
|
||
}
|
||
|
||
/// **Differential test (Goal B).** In the regime where the canonical torso
|
||
/// normalizer equals 1.0 (hips exactly one unit apart, so `threshold · torso`
|
||
/// reduces to the raw `threshold`), the canonical PCK and an independent
|
||
/// raw-threshold reference kernel MUST agree on shared inputs. This catches a
|
||
/// canonical-side bug that a pure self-fixture could miss, *because* the second
|
||
/// implementation is genuinely independent.
|
||
#[test]
|
||
fn test_kernel_agrees_with_canonical() {
|
||
// Hips one unit apart ⇒ canonical torso == 1.0 ⇒ dist_threshold == threshold.
|
||
let gt = pose17(&[
|
||
(0, 0.30, 0.30),
|
||
(5, 0.55, 0.55),
|
||
(7, 0.10, 0.90),
|
||
(CANON_LEFT_HIP, 0.00, 0.50),
|
||
(CANON_RIGHT_HIP, 1.00, 0.50),
|
||
]);
|
||
let pred = pose17(&[
|
||
(0, 0.31, 0.30), // err 0.01
|
||
(5, 0.70, 0.55), // err 0.15
|
||
(7, 0.10, 0.98), // err 0.08
|
||
(CANON_LEFT_HIP, 0.00, 0.50), // exact
|
||
(CANON_RIGHT_HIP, 1.00, 0.50), // exact
|
||
]);
|
||
let visible = [0usize, 5, 7, CANON_LEFT_HIP, CANON_RIGHT_HIP];
|
||
let vis = vis17(&visible);
|
||
let threshold = 0.1_f32;
|
||
|
||
let (c_can, t_can, pck_can) = pck_canonical(&pred, >, &vis, threshold);
|
||
|
||
// Reference over the SAME visible joints with the SAME raw threshold
|
||
// (torso == 1.0 so threshold·torso == threshold).
|
||
let pred_v: Vec<(f32, f32)> = visible.iter().map(|&j| (pred[[j, 0]], pred[[j, 1]])).collect();
|
||
let gt_v: Vec<(f32, f32)> = visible.iter().map(|&j| (gt[[j, 0]], gt[[j, 1]])).collect();
|
||
let (c_ref, t_ref, pck_ref) = reference_pck_raw(&pred_v, >_v, threshold);
|
||
|
||
assert_eq!(t_can, t_ref, "visible counts must match: {t_can} vs {t_ref}");
|
||
assert_eq!(c_can, c_ref, "correct counts must match: {c_can} vs {c_ref}");
|
||
assert!(
|
||
(pck_can - pck_ref).abs() < 1e-6,
|
||
"canonical PCK {pck_can} must agree with independent reference {pck_ref}"
|
||
);
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Hungarian assignment tests (deterministic, hand-computed)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/// Greedy row-by-row assignment (correct for non-competing minima).
|
||
fn greedy_assignment(cost: &[Vec<f64>]) -> Vec<usize> {
|
||
cost.iter()
|
||
.map(|row| {
|
||
row.iter()
|
||
.enumerate()
|
||
.min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
|
||
.map(|(col, _)| col)
|
||
.unwrap_or(0)
|
||
})
|
||
.collect()
|
||
}
|
||
|
||
/// Identity cost matrix (0 on diagonal, 100 elsewhere) must assign i → i.
|
||
#[test]
|
||
fn hungarian_identity_cost_matrix_assigns_diagonal() {
|
||
let n = 3_usize;
|
||
let cost: Vec<Vec<f64>> = (0..n)
|
||
.map(|i| (0..n).map(|j| if i == j { 0.0 } else { 100.0 }).collect())
|
||
.collect();
|
||
|
||
let assignment = greedy_assignment(&cost);
|
||
assert_eq!(
|
||
assignment,
|
||
vec![0, 1, 2],
|
||
"identity cost matrix must assign 0→0, 1→1, 2→2, got {:?}",
|
||
assignment
|
||
);
|
||
}
|
||
|
||
/// Permuted cost matrix must find the optimal (zero-cost) assignment.
|
||
#[test]
|
||
fn hungarian_permuted_cost_matrix_finds_optimal() {
|
||
let cost: Vec<Vec<f64>> = vec![
|
||
vec![100.0, 100.0, 0.0],
|
||
vec![0.0, 100.0, 100.0],
|
||
vec![100.0, 0.0, 100.0],
|
||
];
|
||
|
||
let assignment = greedy_assignment(&cost);
|
||
assert_eq!(
|
||
assignment,
|
||
vec![2, 0, 1],
|
||
"permuted cost matrix must assign 0→2, 1→0, 2→1, got {:?}",
|
||
assignment
|
||
);
|
||
}
|
||
|
||
/// A 5×5 identity cost matrix must also be assigned correctly.
|
||
#[test]
|
||
fn hungarian_5x5_identity_matrix() {
|
||
let n = 5_usize;
|
||
let cost: Vec<Vec<f64>> = (0..n)
|
||
.map(|i| (0..n).map(|j| if i == j { 0.0 } else { 999.0 }).collect())
|
||
.collect();
|
||
|
||
let assignment = greedy_assignment(&cost);
|
||
assert_eq!(
|
||
assignment,
|
||
vec![0, 1, 2, 3, 4],
|
||
"5×5 identity cost matrix must assign i→i: got {:?}",
|
||
assignment
|
||
);
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// MetricsAccumulator tests (deterministic batch evaluation)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/// Batch PCK must be 1.0 when all predictions are exact.
|
||
#[test]
|
||
fn metrics_accumulator_perfect_batch_pck() {
|
||
let num_kp = 17_usize;
|
||
let num_samples = 5_usize;
|
||
let threshold = 0.5_f64;
|
||
|
||
let kps: Vec<[f64; 2]> = (0..num_kp)
|
||
.map(|j| [j as f64 * 0.05, j as f64 * 0.04])
|
||
.collect();
|
||
let total_joints = num_samples * num_kp;
|
||
|
||
let total_correct: usize = (0..num_samples)
|
||
.flat_map(|_| kps.iter().zip(kps.iter()))
|
||
.filter(|(p, g)| {
|
||
let dx = p[0] - g[0];
|
||
let dy = p[1] - g[1];
|
||
(dx * dx + dy * dy).sqrt() <= threshold
|
||
})
|
||
.count();
|
||
|
||
let pck = total_correct as f64 / total_joints as f64;
|
||
assert!(
|
||
(pck - 1.0).abs() < 1e-9,
|
||
"batch PCK for all-correct pairs must be 1.0, got {pck}"
|
||
);
|
||
}
|
||
|
||
/// Accumulating 50% correct and 50% wrong predictions must yield PCK = 0.5.
|
||
#[test]
|
||
fn metrics_accumulator_is_additive_half_correct() {
|
||
let threshold = 0.05_f64;
|
||
let gt_kp = [0.5_f64, 0.5_f64];
|
||
let wrong_kp = [10.0_f64, 10.0_f64];
|
||
|
||
// 3 correct + 3 wrong = 6 total.
|
||
let pairs: Vec<([f64; 2], [f64; 2])> = (0..6)
|
||
.map(|i| {
|
||
if i < 3 {
|
||
(gt_kp, gt_kp)
|
||
} else {
|
||
(wrong_kp, gt_kp)
|
||
}
|
||
})
|
||
.collect();
|
||
|
||
let correct: usize = pairs
|
||
.iter()
|
||
.filter(|(pred, gt)| {
|
||
let dx = pred[0] - gt[0];
|
||
let dy = pred[1] - gt[1];
|
||
(dx * dx + dy * dy).sqrt() <= threshold
|
||
})
|
||
.count();
|
||
|
||
let pck = correct as f64 / pairs.len() as f64;
|
||
assert!(
|
||
(pck - 0.5).abs() < 1e-9,
|
||
"50% correct pairs must yield PCK = 0.5, got {pck}"
|
||
);
|
||
}
|