wifi-densepose/v2/crates/wifi-densepose-train/tests/test_metrics.rs

613 lines
23 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Integration tests for `wifi_densepose_train` pose metrics.
//!
//! # ADR-155 Milestone-1 — §8 "reference kernels" resolution
//!
//! The full `metrics` module is gated behind `tch-backend` (libtorch), but the
//! **canonical** metric core (`pck_canonical` / `oks_canonical`) now lives in
//! the un-gated `metrics_core` module and is re-exported at the crate root, so
//! these workspace tests (run under `--no-default-features`) validate the
//! **production** functions directly.
//!
//! Previously this file carried its own local `compute_pck` / `compute_oks`
//! reimplementations and asserted properties of *those* — a test that could
//! not catch a bug in the canonical implementation (both could be wrong the
//! same way). That is fixed two ways here:
//!
//! 1. **Fixture tests** (`canonical_pck_matches_hand_computed_fixture`,
//! `canonical_oks_*`) assert the production `pck_canonical` / `oks_canonical`
//! equal *hand-computed* expected values — numbers worked out by hand below,
//! NOT a second implementation of the same algorithm.
//! 2. **Differential test** (`test_kernel_agrees_with_canonical`) keeps a small
//! independent reference kernel and asserts it **agrees** with the canonical
//! function on shared inputs (in the torso=raw-threshold regime where the two
//! coincide), so the reference adds genuine cross-check value rather than
//! duplicating the algorithm under test.
//!
//! `EvalMetrics` tests remain `#[cfg(feature = "tch-backend")]` (that type is in
//! the gated module). All inputs are fixed, deterministic arrays — no `rand`,
//! no OS entropy.
use ndarray::{Array1, Array2};
use wifi_densepose_train::{oks_canonical, pck_canonical, CANON_LEFT_HIP, CANON_RIGHT_HIP};
// ADR-155 §Tier-1.2 — metric-locked accuracy harness public surface.
use wifi_densepose_train::{accuracy_report, pck_at, PckNormalization, PoseFrame};
// ---------------------------------------------------------------------------
// Metric-locked accuracy harness: the three PCK normalizations are reachable
// from the crate root and give DIFFERENT PCK on identical predictions — the
// proof that the 96 / 81.6 / 61 figures were non-comparable (validated here as
// a downstream consumer would call it).
// ---------------------------------------------------------------------------
/// Identical predictions, three declared normalizations ⇒ three distinct PCK.
/// Hand calc (all coords in `[0,1]`):
/// * GT: nose(0)=(0.50,0.10), l_sh(5)=(0.50,0.30), hips=(0.40,0.90)/(0.60,0.90).
/// * Pred: nose err 0.06, shoulder err 0.10, hips exact.
/// * torso = 0.20 ⇒ τ@20 = 0.04 ⇒ only hips correct ⇒ 2/4 = **0.50**.
/// * bbox = √(0.20²+0.80²)=0.82462 ⇒ τ@20 = 0.16492 ⇒ all correct ⇒ **1.00**.
/// * abs(0.08): nose 0.06≤0.08 ok, shoulder 0.10>0.08 wrong ⇒ 3/4 = **0.75**.
#[test]
fn harness_three_normalizations_differ_from_crate_root() {
let gt = pose17(&[
(0, 0.50, 0.10),
(5, 0.50, 0.30),
(CANON_LEFT_HIP, 0.40, 0.90),
(CANON_RIGHT_HIP, 0.60, 0.90),
]);
let pred = pose17(&[
(0, 0.56, 0.10),
(5, 0.60, 0.30),
(CANON_LEFT_HIP, 0.40, 0.90),
(CANON_RIGHT_HIP, 0.60, 0.90),
]);
let vis = vis17(&[0, 5, CANON_LEFT_HIP, CANON_RIGHT_HIP]);
let (_, _, torso) = pck_at(&pred, &gt, &vis, 20, PckNormalization::TorsoDiameter);
let (_, _, bbox) = pck_at(&pred, &gt, &vis, 20, PckNormalization::BoundingBoxDiagonal);
let (_, _, abs) = pck_at(&pred, &gt, &vis, 20, PckNormalization::AbsolutePixels(0.08));
assert!((torso - 0.50).abs() < 1e-6, "torso PCK 0.50, got {torso}");
assert!((bbox - 1.00).abs() < 1e-6, "bbox PCK 1.00, got {bbox}");
assert!((abs - 0.75).abs() < 1e-6, "abs(0.08) PCK 0.75, got {abs}");
assert!(
torso != bbox && bbox != abs && torso != abs,
"three normalizations must be distinct: {torso} / {bbox} / {abs}"
);
}
/// `accuracy_report` returns a self-describing result carrying its normalization,
/// so an unlabeled PCK number is structurally impossible at the API boundary.
#[test]
fn harness_report_carries_normalization_label() {
let gt = pose17(&[(CANON_LEFT_HIP, 0.40, 0.50), (CANON_RIGHT_HIP, 0.60, 0.50)]);
let vis = vis17(&[CANON_LEFT_HIP, CANON_RIGHT_HIP]);
let frame = PoseFrame { pred: gt.clone(), gt: gt.clone(), visibility: vis };
let report = accuracy_report(&[frame], &[20], PckNormalization::BoundingBoxDiagonal);
assert_eq!(report.normalization, PckNormalization::BoundingBoxDiagonal);
assert_eq!(report.n_keypoints, 17);
assert_eq!(report.n_frames, 1);
assert!((report.pck(20).unwrap() - 1.0).abs() < 1e-6);
assert!(report.summary().contains("bbox-diagonal"));
}
// ---------------------------------------------------------------------------
// Tests that use `EvalMetrics` (requires tch-backend because the metrics
// module is feature-gated in lib.rs)
// ---------------------------------------------------------------------------
#[cfg(feature = "tch-backend")]
mod eval_metrics_tests {
use wifi_densepose_train::metrics::EvalMetrics;
/// A freshly constructed [`EvalMetrics`] should hold exactly the values
/// that were passed in.
#[test]
fn eval_metrics_stores_correct_values() {
let m = EvalMetrics {
mpjpe: 0.05,
pck_at_05: 0.92,
gps: 1.3,
};
assert!(
(m.mpjpe - 0.05).abs() < 1e-12,
"mpjpe must be 0.05, got {}",
m.mpjpe
);
assert!(
(m.pck_at_05 - 0.92).abs() < 1e-12,
"pck_at_05 must be 0.92, got {}",
m.pck_at_05
);
assert!(
(m.gps - 1.3).abs() < 1e-12,
"gps must be 1.3, got {}",
m.gps
);
}
/// `pck_at_05` of a perfect prediction must be 1.0.
#[test]
fn pck_perfect_prediction_is_one() {
let m = EvalMetrics {
mpjpe: 0.0,
pck_at_05: 1.0,
gps: 0.0,
};
assert!(
(m.pck_at_05 - 1.0).abs() < 1e-9,
"perfect prediction must yield pck_at_05 = 1.0, got {}",
m.pck_at_05
);
}
/// `pck_at_05` of a completely wrong prediction must be 0.0.
#[test]
fn pck_completely_wrong_prediction_is_zero() {
let m = EvalMetrics {
mpjpe: 999.0,
pck_at_05: 0.0,
gps: 999.0,
};
assert!(
m.pck_at_05.abs() < 1e-9,
"completely wrong prediction must yield pck_at_05 = 0.0, got {}",
m.pck_at_05
);
}
/// `mpjpe` must be 0.0 when predicted and GT positions are identical.
#[test]
fn mpjpe_perfect_prediction_is_zero() {
let m = EvalMetrics {
mpjpe: 0.0,
pck_at_05: 1.0,
gps: 0.0,
};
assert!(
m.mpjpe.abs() < 1e-12,
"perfect prediction must yield mpjpe = 0.0, got {}",
m.mpjpe
);
}
/// `mpjpe` must increase monotonically with prediction error.
#[test]
fn mpjpe_is_monotone_with_distance() {
let small_error = EvalMetrics {
mpjpe: 0.01,
pck_at_05: 0.99,
gps: 0.1,
};
let medium_error = EvalMetrics {
mpjpe: 0.10,
pck_at_05: 0.70,
gps: 1.0,
};
let large_error = EvalMetrics {
mpjpe: 0.50,
pck_at_05: 0.20,
gps: 5.0,
};
assert!(
small_error.mpjpe < medium_error.mpjpe,
"small error mpjpe must be < medium error mpjpe"
);
assert!(
medium_error.mpjpe < large_error.mpjpe,
"medium error mpjpe must be < large error mpjpe"
);
}
/// GPS must be 0.0 for a perfect DensePose prediction.
#[test]
fn gps_perfect_prediction_is_zero() {
let m = EvalMetrics {
mpjpe: 0.0,
pck_at_05: 1.0,
gps: 0.0,
};
assert!(
m.gps.abs() < 1e-12,
"perfect prediction must yield gps = 0.0, got {}",
m.gps
);
}
/// GPS must increase monotonically as prediction quality degrades.
#[test]
fn gps_monotone_with_distance() {
let perfect = EvalMetrics {
mpjpe: 0.0,
pck_at_05: 1.0,
gps: 0.0,
};
let imperfect = EvalMetrics {
mpjpe: 0.1,
pck_at_05: 0.8,
gps: 2.0,
};
let poor = EvalMetrics {
mpjpe: 0.5,
pck_at_05: 0.3,
gps: 8.0,
};
assert!(
perfect.gps < imperfect.gps,
"perfect GPS must be < imperfect GPS"
);
assert!(imperfect.gps < poor.gps, "imperfect GPS must be < poor GPS");
}
}
// ---------------------------------------------------------------------------
// Canonical PCK / OKS validation (production functions, no tch)
// ---------------------------------------------------------------------------
/// Build a 17-joint pose in `[0,1]` coordinates from an `(x, y)` per-joint list,
/// padding any unspecified joint to `(0,0)`. Returns `[17, 2]`.
fn pose17(joints: &[(usize, f32, f32)]) -> Array2<f32> {
let mut a = Array2::<f32>::zeros((17, 2));
for &(j, x, y) in joints {
a[[j, 0]] = x;
a[[j, 1]] = y;
}
a
}
/// Visibility vector with the listed joints visible (`2.0`), rest invisible.
fn vis17(visible: &[usize]) -> Array1<f32> {
let mut v = Array1::<f32>::zeros(17);
for &j in visible {
v[j] = 2.0;
}
v
}
/// **Fixture test (Goal B).** The production `pck_canonical` must equal a value
/// worked out *by hand* on a constructed pose — not a reimplementation.
///
/// Construction (all coordinates in `[0,1]`):
/// * left_hip(11) = (0.40, 0.50), right_hip(12) = (0.60, 0.50)
/// ⇒ canonical torso = hip↔hip width = 0.20.
/// * threshold = 0.2 ⇒ dist_threshold = 0.2 × 0.20 = **0.04**.
/// * Visible joints: {0 (nose), 5 (l_shoulder), 11, 12}. (4 visible.)
/// - nose(0): pred == gt ⇒ dist 0.00 ≤ 0.04 ⇒ CORRECT
/// - l_shoulder(5): pred off by dy=0.10 ⇒ dist 0.10 > 0.04 ⇒ wrong
/// - l_hip(11): pred == gt ⇒ dist 0.00 ≤ 0.04 ⇒ CORRECT
/// - r_hip(12): pred off by dx=0.03 ⇒ dist 0.03 ≤ 0.04 ⇒ CORRECT
/// Hand result: correct = 3, total = 4, pck = 3/4 = **0.75**.
#[test]
fn canonical_pck_matches_hand_computed_fixture() {
let gt = pose17(&[
(0, 0.50, 0.20), // nose
(5, 0.35, 0.35), // left_shoulder
(CANON_LEFT_HIP, 0.40, 0.50),
(CANON_RIGHT_HIP, 0.60, 0.50),
]);
let pred = pose17(&[
(0, 0.50, 0.20), // exact
(5, 0.35, 0.45), // off by dy = 0.10 (> 0.04)
(CANON_LEFT_HIP, 0.40, 0.50), // exact
(CANON_RIGHT_HIP, 0.63, 0.50), // off by dx = 0.03 (<= 0.04)
]);
let vis = vis17(&[0, 5, CANON_LEFT_HIP, CANON_RIGHT_HIP]);
let (correct, total, pck) = pck_canonical(&pred, &gt, &vis, 0.2);
assert_eq!(total, 4, "4 visible joints expected, got {total}");
assert_eq!(correct, 3, "hand-computed: 3 of 4 within 0.04, got {correct}");
assert!(
(pck - 0.75).abs() < 1e-6,
"hand-computed PCK is 0.75, got {pck}"
);
}
/// Pin the **normalizer**: PCK uses hip↔hip torso width. A prediction error of
/// 0.18 (just under 0.2 × torso=1.0 wide hips) is CORRECT, but the same error
/// is WRONG once the hips are squeezed to width 0.20 (threshold 0.04). If the
/// implementation ignored the torso normalizer this test would fail.
#[test]
fn canonical_pck_uses_hip_to_hip_torso_normalizer() {
// Wide hips: width 1.0 ⇒ threshold 0.2. An error of 0.18 on joint 5 is OK.
let gt_wide = pose17(&[(5, 0.50, 0.50), (CANON_LEFT_HIP, 0.0, 0.5), (CANON_RIGHT_HIP, 1.0, 0.5)]);
let pred_wide = pose17(&[(5, 0.68, 0.50), (CANON_LEFT_HIP, 0.0, 0.5), (CANON_RIGHT_HIP, 1.0, 0.5)]);
let vis = vis17(&[5, CANON_LEFT_HIP, CANON_RIGHT_HIP]);
let (_, _, pck_wide) = pck_canonical(&pred_wide, &gt_wide, &vis, 0.2);
// Narrow hips: width 0.20 ⇒ threshold 0.04. Same 0.18 error on joint 5 is wrong.
let gt_narrow = pose17(&[(5, 0.50, 0.50), (CANON_LEFT_HIP, 0.40, 0.5), (CANON_RIGHT_HIP, 0.60, 0.5)]);
let pred_narrow = pose17(&[(5, 0.68, 0.50), (CANON_LEFT_HIP, 0.40, 0.5), (CANON_RIGHT_HIP, 0.60, 0.5)]);
let (_, _, pck_narrow) = pck_canonical(&pred_narrow, &gt_narrow, &vis, 0.2);
// Joints 11/12 are exact (correct in both); joint 5 flips.
// Wide: 3/3 = 1.0; Narrow: 2/3 ≈ 0.667.
assert!((pck_wide - 1.0).abs() < 1e-6, "wide-hip PCK should be 1.0, got {pck_wide}");
assert!(
(pck_narrow - 2.0 / 3.0).abs() < 1e-6,
"narrow-hip PCK should be 2/3 (joint 5 now out of tolerance), got {pck_narrow}"
);
}
/// The claim-inflating bug: no visible joints must score **0.0**, never 1.0.
#[test]
fn canonical_pck_zero_visible_is_zero() {
let kpts = pose17(&[(CANON_LEFT_HIP, 0.4, 0.5), (CANON_RIGHT_HIP, 0.6, 0.5)]);
let vis = vis17(&[]); // nothing visible
let (correct, total, pck) = pck_canonical(&kpts, &kpts, &vis, 0.2);
assert_eq!((correct, total), (0, 0));
assert_eq!(pck, 0.0, "no-visible-joint PCK must be 0.0 (not the old 1.0)");
}
// ---------------------------------------------------------------------------
// Canonical OKS validation (production function, no tch)
// ---------------------------------------------------------------------------
/// **Fixture test (Goal B).** A perfect prediction (pred == gt) makes every
/// Gaussian term `exp(0) = 1`, so the canonical OKS is exactly **1.0** —
/// hand-evident, independent of the (positive) scale.
#[test]
fn canonical_oks_perfect_prediction_is_one() {
let gt = pose17(&[
(0, 0.50, 0.20),
(5, 0.35, 0.35),
(CANON_LEFT_HIP, 0.40, 0.50),
(CANON_RIGHT_HIP, 0.60, 0.50),
]);
let vis = vis17(&[0, 5, CANON_LEFT_HIP, CANON_RIGHT_HIP]);
let oks = oks_canonical(&gt, &gt, &vis);
assert!(
(oks - 1.0).abs() < 1e-6,
"OKS for a perfect prediction must be 1.0, got {oks}"
);
}
/// **The "fake Gold tier" bug, pinned (Goal B).** On normalized `[0,1]`
/// coordinates the historical `s = 1.0` path returned ≈1.0 for *any* pose.
/// Canonical derives `s` from the pose extent (here torso width = 0.20), so a
/// pose whose visible non-hip joint is off by ~3× the torso scores far below
/// the "Gold" tier. Hand bound: for joint 5 with d ≈ 0.60, s = 0.20, k = 0.079,
/// the exponent `-d²/(2 s² k²)` is enormously negative ⇒ that term ≈ 0; the two
/// (exact) hip terms give 1 each ⇒ OKS ≈ 2/3 at most, and with joint-5 ≈ 0 the
/// mean is ≈ 0.667. We assert it is comfortably **< 0.8** (and the wrong joint
/// contributes ≈ 0), i.e. nowhere near the old ≈1.0.
#[test]
fn canonical_oks_not_one_for_wrong_pose_on_normalized_coords() {
let gt = pose17(&[
(5, 0.30, 0.50),
(CANON_LEFT_HIP, 0.40, 0.50),
(CANON_RIGHT_HIP, 0.60, 0.50),
]);
// Joint 5 dragged 0.60 away (3× the 0.20 torso); hips exact.
let pred = pose17(&[
(5, 0.90, 0.50),
(CANON_LEFT_HIP, 0.40, 0.50),
(CANON_RIGHT_HIP, 0.60, 0.50),
]);
let vis = vis17(&[5, CANON_LEFT_HIP, CANON_RIGHT_HIP]);
let oks = oks_canonical(&pred, &gt, &vis);
assert!(
oks < 0.8,
"wrong-pose OKS on [0,1] coords must NOT be ≈1.0 (fake-Gold bug); got {oks}"
);
// The two exact hips alone give 2/3; the wrong joint must add ~nothing.
assert!(
(oks - 2.0 / 3.0).abs() < 0.05,
"wrong joint should contribute ≈0 ⇒ OKS ≈ 2/3, got {oks}"
);
}
/// Canonical OKS decreases monotonically with prediction error.
#[test]
fn canonical_oks_decreases_with_distance() {
let gt = pose17(&[(5, 0.50, 0.50), (CANON_LEFT_HIP, 0.40, 0.50), (CANON_RIGHT_HIP, 0.60, 0.50)]);
let vis = vis17(&[5, CANON_LEFT_HIP, CANON_RIGHT_HIP]);
let mk = |x5: f32| pose17(&[(5, x5, 0.50), (CANON_LEFT_HIP, 0.40, 0.50), (CANON_RIGHT_HIP, 0.60, 0.50)]);
let oks0 = oks_canonical(&mk(0.50), &gt, &vis);
let oks1 = oks_canonical(&mk(0.52), &gt, &vis);
let oks2 = oks_canonical(&mk(0.60), &gt, &vis);
assert!(oks0 > oks1, "OKS must drop as error grows: {oks0} vs {oks1}");
assert!(oks1 > oks2, "OKS must drop as error grows: {oks1} vs {oks2}");
}
// ---------------------------------------------------------------------------
// Differential cross-check: independent reference kernel vs canonical (Goal B)
// ---------------------------------------------------------------------------
/// A deliberately *independent* PCK reference implementation in the simplest
/// regime — a **raw distance threshold** (no torso normalization). It is kept
/// only to cross-check the canonical function, not to define the metric.
fn reference_pck_raw(pred: &[(f32, f32)], gt: &[(f32, f32)], dist_threshold: f32) -> (usize, usize, f32) {
let n = pred.len().min(gt.len());
let mut correct = 0usize;
for i in 0..n {
let dx = pred[i].0 - gt[i].0;
let dy = pred[i].1 - gt[i].1;
if (dx * dx + dy * dy).sqrt() <= dist_threshold {
correct += 1;
}
}
let pck = if n > 0 { correct as f32 / n as f32 } else { 0.0 };
(correct, n, pck)
}
/// **Differential test (Goal B).** In the regime where the canonical torso
/// normalizer equals 1.0 (hips exactly one unit apart, so `threshold · torso`
/// reduces to the raw `threshold`), the canonical PCK and an independent
/// raw-threshold reference kernel MUST agree on shared inputs. This catches a
/// canonical-side bug that a pure self-fixture could miss, *because* the second
/// implementation is genuinely independent.
#[test]
fn test_kernel_agrees_with_canonical() {
// Hips one unit apart ⇒ canonical torso == 1.0 ⇒ dist_threshold == threshold.
let gt = pose17(&[
(0, 0.30, 0.30),
(5, 0.55, 0.55),
(7, 0.10, 0.90),
(CANON_LEFT_HIP, 0.00, 0.50),
(CANON_RIGHT_HIP, 1.00, 0.50),
]);
let pred = pose17(&[
(0, 0.31, 0.30), // err 0.01
(5, 0.70, 0.55), // err 0.15
(7, 0.10, 0.98), // err 0.08
(CANON_LEFT_HIP, 0.00, 0.50), // exact
(CANON_RIGHT_HIP, 1.00, 0.50), // exact
]);
let visible = [0usize, 5, 7, CANON_LEFT_HIP, CANON_RIGHT_HIP];
let vis = vis17(&visible);
let threshold = 0.1_f32;
let (c_can, t_can, pck_can) = pck_canonical(&pred, &gt, &vis, threshold);
// Reference over the SAME visible joints with the SAME raw threshold
// (torso == 1.0 so threshold·torso == threshold).
let pred_v: Vec<(f32, f32)> = visible.iter().map(|&j| (pred[[j, 0]], pred[[j, 1]])).collect();
let gt_v: Vec<(f32, f32)> = visible.iter().map(|&j| (gt[[j, 0]], gt[[j, 1]])).collect();
let (c_ref, t_ref, pck_ref) = reference_pck_raw(&pred_v, &gt_v, threshold);
assert_eq!(t_can, t_ref, "visible counts must match: {t_can} vs {t_ref}");
assert_eq!(c_can, c_ref, "correct counts must match: {c_can} vs {c_ref}");
assert!(
(pck_can - pck_ref).abs() < 1e-6,
"canonical PCK {pck_can} must agree with independent reference {pck_ref}"
);
}
// ---------------------------------------------------------------------------
// Hungarian assignment tests (deterministic, hand-computed)
// ---------------------------------------------------------------------------
/// Greedy row-by-row assignment (correct for non-competing minima).
fn greedy_assignment(cost: &[Vec<f64>]) -> Vec<usize> {
cost.iter()
.map(|row| {
row.iter()
.enumerate()
.min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.map(|(col, _)| col)
.unwrap_or(0)
})
.collect()
}
/// Identity cost matrix (0 on diagonal, 100 elsewhere) must assign i → i.
#[test]
fn hungarian_identity_cost_matrix_assigns_diagonal() {
let n = 3_usize;
let cost: Vec<Vec<f64>> = (0..n)
.map(|i| (0..n).map(|j| if i == j { 0.0 } else { 100.0 }).collect())
.collect();
let assignment = greedy_assignment(&cost);
assert_eq!(
assignment,
vec![0, 1, 2],
"identity cost matrix must assign 0→0, 1→1, 2→2, got {:?}",
assignment
);
}
/// Permuted cost matrix must find the optimal (zero-cost) assignment.
#[test]
fn hungarian_permuted_cost_matrix_finds_optimal() {
let cost: Vec<Vec<f64>> = vec![
vec![100.0, 100.0, 0.0],
vec![0.0, 100.0, 100.0],
vec![100.0, 0.0, 100.0],
];
let assignment = greedy_assignment(&cost);
assert_eq!(
assignment,
vec![2, 0, 1],
"permuted cost matrix must assign 0→2, 1→0, 2→1, got {:?}",
assignment
);
}
/// A 5×5 identity cost matrix must also be assigned correctly.
#[test]
fn hungarian_5x5_identity_matrix() {
let n = 5_usize;
let cost: Vec<Vec<f64>> = (0..n)
.map(|i| (0..n).map(|j| if i == j { 0.0 } else { 999.0 }).collect())
.collect();
let assignment = greedy_assignment(&cost);
assert_eq!(
assignment,
vec![0, 1, 2, 3, 4],
"5×5 identity cost matrix must assign i→i: got {:?}",
assignment
);
}
// ---------------------------------------------------------------------------
// MetricsAccumulator tests (deterministic batch evaluation)
// ---------------------------------------------------------------------------
/// Batch PCK must be 1.0 when all predictions are exact.
#[test]
fn metrics_accumulator_perfect_batch_pck() {
let num_kp = 17_usize;
let num_samples = 5_usize;
let threshold = 0.5_f64;
let kps: Vec<[f64; 2]> = (0..num_kp)
.map(|j| [j as f64 * 0.05, j as f64 * 0.04])
.collect();
let total_joints = num_samples * num_kp;
let total_correct: usize = (0..num_samples)
.flat_map(|_| kps.iter().zip(kps.iter()))
.filter(|(p, g)| {
let dx = p[0] - g[0];
let dy = p[1] - g[1];
(dx * dx + dy * dy).sqrt() <= threshold
})
.count();
let pck = total_correct as f64 / total_joints as f64;
assert!(
(pck - 1.0).abs() < 1e-9,
"batch PCK for all-correct pairs must be 1.0, got {pck}"
);
}
/// Accumulating 50% correct and 50% wrong predictions must yield PCK = 0.5.
#[test]
fn metrics_accumulator_is_additive_half_correct() {
let threshold = 0.05_f64;
let gt_kp = [0.5_f64, 0.5_f64];
let wrong_kp = [10.0_f64, 10.0_f64];
// 3 correct + 3 wrong = 6 total.
let pairs: Vec<([f64; 2], [f64; 2])> = (0..6)
.map(|i| {
if i < 3 {
(gt_kp, gt_kp)
} else {
(wrong_kp, gt_kp)
}
})
.collect();
let correct: usize = pairs
.iter()
.filter(|(pred, gt)| {
let dx = pred[0] - gt[0];
let dy = pred[1] - gt[1];
(dx * dx + dy * dy).sqrt() <= threshold
})
.count();
let pck = correct as f64 / pairs.len() as f64;
assert!(
(pck - 0.5).abs() < 1e-9,
"50% correct pairs must yield PCK = 0.5, got {pck}"
);
}