fix(train): unify 7 divergent PCK/OKS into one canonical metric (ADR-155 §Tier-1.1)
Collapse the four PCK and three OKS implementations into a single source of
truth — pck_canonical (torso hip↔hip, COCO/ADR-152 convention validated at
~96% PCK@20 in benchmarks/wiflow-std) and oks_canonical (scale from GT pose
extent). MetricsAccumulator, compute_pck/_per_joint/_oks, aggregate_metrics and
the deprecated *_v2 path all route through them, so Trainer::evaluate() and the
bench definition agree.
Fixes two claim-inflating bugs, each pinned by a regression test:
- zero-visible-joint PCK was 1.0 (false-perfect) -> now 0.0
- OKS s=1.0 on normalized coords made OKS~=1.0 for any pose ("fake Gold tier")
-> scale now derived from the pose; a 3x-torso-wrong pose yields OKS<0.2
Divergent local kernels (training_bench raw-threshold, sensing-server
torso-height) annotated "DO NOT USE for reported metrics". Legitimately changed
test expectations (all-coincident "perfect" fixtures are correctly unscoreable;
all-invisible -> 0.0) updated with comments citing the finding.
Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
6511ca90fb
commit
50b657459f
|
|
@ -88,12 +88,24 @@ pub struct TrainingConfig {
|
|||
pub lora_profile: Option<String>,
|
||||
}
|
||||
|
||||
fn default_epochs() -> u32 { 100 }
|
||||
fn default_batch_size() -> u32 { 8 }
|
||||
fn default_learning_rate() -> f64 { 0.001 }
|
||||
fn default_weight_decay() -> f64 { 1e-4 }
|
||||
fn default_early_stopping_patience() -> u32 { 20 }
|
||||
fn default_warmup_epochs() -> u32 { 5 }
|
||||
fn default_epochs() -> u32 {
|
||||
100
|
||||
}
|
||||
fn default_batch_size() -> u32 {
|
||||
8
|
||||
}
|
||||
fn default_learning_rate() -> f64 {
|
||||
0.001
|
||||
}
|
||||
fn default_weight_decay() -> f64 {
|
||||
1e-4
|
||||
}
|
||||
fn default_early_stopping_patience() -> u32 {
|
||||
20
|
||||
}
|
||||
fn default_warmup_epochs() -> u32 {
|
||||
5
|
||||
}
|
||||
|
||||
impl Default for TrainingConfig {
|
||||
fn default() -> Self {
|
||||
|
|
@ -127,7 +139,9 @@ pub struct PretrainRequest {
|
|||
pub lr: f64,
|
||||
}
|
||||
|
||||
fn default_pretrain_epochs() -> u32 { 50 }
|
||||
fn default_pretrain_epochs() -> u32 {
|
||||
50
|
||||
}
|
||||
|
||||
/// Request body for `POST /api/v1/train/lora`.
|
||||
#[derive(Debug, Deserialize)]
|
||||
|
|
@ -141,8 +155,12 @@ pub struct LoraTrainRequest {
|
|||
pub epochs: u32,
|
||||
}
|
||||
|
||||
fn default_lora_rank() -> u8 { 8 }
|
||||
fn default_lora_epochs() -> u32 { 30 }
|
||||
fn default_lora_rank() -> u8 {
|
||||
8
|
||||
}
|
||||
fn default_lora_epochs() -> u32 {
|
||||
30
|
||||
}
|
||||
|
||||
/// Current training status (returned by `GET /api/v1/train/status`).
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
|
@ -360,7 +378,11 @@ fn extract_features_for_frame(
|
|||
let mut sum = 0.0f64;
|
||||
let mut sq_sum = 0.0f64;
|
||||
for w in window {
|
||||
let a = if k < w.subcarriers.len() { w.subcarriers[k] } else { 0.0 };
|
||||
let a = if k < w.subcarriers.len() {
|
||||
w.subcarriers[k]
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
sum += a;
|
||||
sq_sum += a * a;
|
||||
}
|
||||
|
|
@ -373,8 +395,16 @@ fn extract_features_for_frame(
|
|||
for k in 0..n_sub {
|
||||
let grad = match prev_frame {
|
||||
Some(prev) => {
|
||||
let cur = if k < frame.subcarriers.len() { frame.subcarriers[k] } else { 0.0 };
|
||||
let prv = if k < prev.subcarriers.len() { prev.subcarriers[k] } else { 0.0 };
|
||||
let cur = if k < frame.subcarriers.len() {
|
||||
frame.subcarriers[k]
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
let prv = if k < prev.subcarriers.len() {
|
||||
prev.subcarriers[k]
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
(cur - prv).abs()
|
||||
}
|
||||
None => 0.0,
|
||||
|
|
@ -426,8 +456,16 @@ fn extract_features_for_frame(
|
|||
if n_cmp > 0 {
|
||||
let diff: f64 = (0..n_cmp)
|
||||
.map(|k| {
|
||||
let c = if k < frame.subcarriers.len() { frame.subcarriers[k] } else { 0.0 };
|
||||
let p = if k < prev.subcarriers.len() { prev.subcarriers[k] } else { 0.0 };
|
||||
let c = if k < frame.subcarriers.len() {
|
||||
frame.subcarriers[k]
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
let p = if k < prev.subcarriers.len() {
|
||||
prev.subcarriers[k]
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
(c - p).powi(2)
|
||||
})
|
||||
.sum::<f64>()
|
||||
|
|
@ -492,8 +530,16 @@ fn compute_teacher_targets(frame: &RecordedFrame, prev_frame: Option<&RecordedFr
|
|||
if n_cmp > 0 {
|
||||
let diff: f64 = (0..n_cmp)
|
||||
.map(|k| {
|
||||
let c = if k < frame.subcarriers.len() { frame.subcarriers[k] } else { 0.0 };
|
||||
let p = if k < prev.subcarriers.len() { prev.subcarriers[k] } else { 0.0 };
|
||||
let c = if k < frame.subcarriers.len() {
|
||||
frame.subcarriers[k]
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
let p = if k < prev.subcarriers.len() {
|
||||
prev.subcarriers[k]
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
(c - p).powi(2)
|
||||
})
|
||||
.sum::<f64>()
|
||||
|
|
@ -503,7 +549,9 @@ fn compute_teacher_targets(frame: &RecordedFrame, prev_frame: Option<&RecordedFr
|
|||
0.0
|
||||
}
|
||||
}
|
||||
None => (variance / (mean_amp * mean_amp + 1e-9)).sqrt().clamp(0.0, 1.0),
|
||||
None => (variance / (mean_amp * mean_amp + 1e-9))
|
||||
.sqrt()
|
||||
.clamp(0.0, 1.0),
|
||||
};
|
||||
|
||||
let is_walking = motion_score > 0.55;
|
||||
|
|
@ -552,23 +600,23 @@ fn compute_teacher_targets(frame: &RecordedFrame, prev_frame: Option<&RecordedFr
|
|||
|
||||
// COCO 17-keypoint offsets from hip center.
|
||||
let kp_offsets: [(f64, f64); 17] = [
|
||||
( 0.0, -80.0), // 0 nose
|
||||
( -8.0, -88.0), // 1 left_eye
|
||||
( 8.0, -88.0), // 2 right_eye
|
||||
(-16.0, -82.0), // 3 left_ear
|
||||
( 16.0, -82.0), // 4 right_ear
|
||||
(-30.0, -50.0), // 5 left_shoulder
|
||||
( 30.0, -50.0), // 6 right_shoulder
|
||||
(-45.0, -15.0), // 7 left_elbow
|
||||
( 45.0, -15.0), // 8 right_elbow
|
||||
(-50.0, 20.0), // 9 left_wrist
|
||||
( 50.0, 20.0), // 10 right_wrist
|
||||
(-20.0, 20.0), // 11 left_hip
|
||||
( 20.0, 20.0), // 12 right_hip
|
||||
(-22.0, 70.0), // 13 left_knee
|
||||
( 22.0, 70.0), // 14 right_knee
|
||||
(-24.0, 120.0), // 15 left_ankle
|
||||
( 24.0, 120.0), // 16 right_ankle
|
||||
(0.0, -80.0), // 0 nose
|
||||
(-8.0, -88.0), // 1 left_eye
|
||||
(8.0, -88.0), // 2 right_eye
|
||||
(-16.0, -82.0), // 3 left_ear
|
||||
(16.0, -82.0), // 4 right_ear
|
||||
(-30.0, -50.0), // 5 left_shoulder
|
||||
(30.0, -50.0), // 6 right_shoulder
|
||||
(-45.0, -15.0), // 7 left_elbow
|
||||
(45.0, -15.0), // 8 right_elbow
|
||||
(-50.0, 20.0), // 9 left_wrist
|
||||
(50.0, 20.0), // 10 right_wrist
|
||||
(-20.0, 20.0), // 11 left_hip
|
||||
(20.0, 20.0), // 12 right_hip
|
||||
(-22.0, 70.0), // 13 left_knee
|
||||
(22.0, 70.0), // 14 right_knee
|
||||
(-24.0, 120.0), // 15 left_ankle
|
||||
(24.0, 120.0), // 16 right_ankle
|
||||
];
|
||||
|
||||
const TORSO_KP: [usize; 4] = [5, 6, 11, 12];
|
||||
|
|
@ -654,7 +702,11 @@ fn extract_features_and_targets(
|
|||
|
||||
for (i, frame) in frames.iter().enumerate() {
|
||||
// Build sliding window of up to VARIANCE_WINDOW preceding frames.
|
||||
let start = if i >= VARIANCE_WINDOW { i - VARIANCE_WINDOW } else { 0 };
|
||||
let start = if i >= VARIANCE_WINDOW {
|
||||
i - VARIANCE_WINDOW
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let window: Vec<&RecordedFrame> = frames[start..i].iter().collect();
|
||||
let prev = if i > 0 { Some(&frames[i - 1]) } else { None };
|
||||
|
||||
|
|
@ -689,7 +741,11 @@ fn extract_features_and_targets(
|
|||
.map(|j| {
|
||||
let var = (sq_mean[j] - mean[j] * mean[j]).max(0.0);
|
||||
let s = var.sqrt();
|
||||
if s < 1e-9 { 1.0 } else { s } // avoid division by zero
|
||||
if s < 1e-9 {
|
||||
1.0
|
||||
} else {
|
||||
s
|
||||
} // avoid division by zero
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
|
@ -737,6 +793,14 @@ fn compute_mse(predictions: &[Vec<f64>], targets: &[Vec<f64>]) -> f64 {
|
|||
///
|
||||
/// Torso height is estimated as the distance between nose (kp 0) and the midpoint
|
||||
/// of the two hips (kps 11, 12).
|
||||
///
|
||||
/// NOTE (ADR-155 §Tier-1.1, DEFERRED backlog item): this is a *separate*,
|
||||
/// torso-HEIGHT-normalized implementation distinct from the canonical hip↔hip
|
||||
/// `wifi_densepose_train::metrics::pck_canonical`. It drives the live server's
|
||||
/// in-loop progress display and is NOT the reported-accuracy metric. Unifying
|
||||
/// it with the canonical definition is tracked as a deferred ADR-155 backlog
|
||||
/// item — left unchanged here to avoid destabilising the running training
|
||||
/// service and to keep this milestone scoped to the train/nn subsystem.
|
||||
fn compute_pck(predictions: &[Vec<f64>], targets: &[Vec<f64>], threshold_ratio: f64) -> f64 {
|
||||
if predictions.is_empty() {
|
||||
return 0.0;
|
||||
|
|
@ -814,9 +878,13 @@ fn deterministic_shuffle(n: usize, seed: u64) -> Vec<usize> {
|
|||
return indices;
|
||||
}
|
||||
// Fisher-Yates with LCG.
|
||||
let mut rng = seed.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
|
||||
let mut rng = seed
|
||||
.wrapping_mul(6364136223846793005)
|
||||
.wrapping_add(1442695040888963407);
|
||||
for i in (1..n).rev() {
|
||||
rng = rng.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
|
||||
rng = rng
|
||||
.wrapping_mul(6364136223846793005)
|
||||
.wrapping_add(1442695040888963407);
|
||||
let j = (rng >> 33) as usize % (i + 1);
|
||||
indices.swap(i, j);
|
||||
}
|
||||
|
|
@ -856,8 +924,13 @@ async fn real_training_loop(
|
|||
|
||||
{
|
||||
let progress = TrainingProgress {
|
||||
epoch: 0, batch: 0, total_batches: 0,
|
||||
train_loss: 0.0, val_pck: 0.0, val_oks: 0.0, lr: 0.0,
|
||||
epoch: 0,
|
||||
batch: 0,
|
||||
total_batches: 0,
|
||||
train_loss: 0.0,
|
||||
val_pck: 0.0,
|
||||
val_oks: 0.0,
|
||||
lr: 0.0,
|
||||
phase: "loading_data".to_string(),
|
||||
};
|
||||
if let Ok(json) = serde_json::to_string(&progress) {
|
||||
|
|
@ -877,8 +950,13 @@ async fn real_training_loop(
|
|||
frames.len()
|
||||
);
|
||||
let fail = TrainingProgress {
|
||||
epoch: 0, batch: 0, total_batches: 0,
|
||||
train_loss: 0.0, val_pck: 0.0, val_oks: 0.0, lr: 0.0,
|
||||
epoch: 0,
|
||||
batch: 0,
|
||||
total_batches: 0,
|
||||
train_loss: 0.0,
|
||||
val_pck: 0.0,
|
||||
val_oks: 0.0,
|
||||
lr: 0.0,
|
||||
phase: "failed_insufficient_data".to_string(),
|
||||
};
|
||||
if let Ok(json) = serde_json::to_string(&fail) {
|
||||
|
|
@ -897,8 +975,13 @@ async fn real_training_loop(
|
|||
|
||||
{
|
||||
let progress = TrainingProgress {
|
||||
epoch: 0, batch: 0, total_batches: 0,
|
||||
train_loss: 0.0, val_pck: 0.0, val_oks: 0.0, lr: 0.0,
|
||||
epoch: 0,
|
||||
batch: 0,
|
||||
total_batches: 0,
|
||||
train_loss: 0.0,
|
||||
val_pck: 0.0,
|
||||
val_oks: 0.0,
|
||||
lr: 0.0,
|
||||
phase: "extracting_features".to_string(),
|
||||
};
|
||||
if let Ok(json) = serde_json::to_string(&progress) {
|
||||
|
|
@ -1148,9 +1231,7 @@ async fn real_training_loop(
|
|||
|
||||
// Early stopping.
|
||||
if patience_remaining == 0 {
|
||||
info!(
|
||||
"Early stopping at epoch {epoch} (best={best_epoch}, PCK={best_pck:.4})"
|
||||
);
|
||||
info!("Early stopping at epoch {epoch} (best={best_epoch}, PCK={best_pck:.4})");
|
||||
let stop_progress = TrainingProgress {
|
||||
epoch,
|
||||
batch: total_batches,
|
||||
|
|
@ -1420,8 +1501,8 @@ pub fn infer_pose_from_model(
|
|||
}
|
||||
|
||||
// Confidence based on feature quality: mean absolute value of normalized features.
|
||||
let feat_magnitude: f64 = features.iter().map(|v| v.abs()).sum::<f64>()
|
||||
/ features.len().max(1) as f64;
|
||||
let feat_magnitude: f64 =
|
||||
features.iter().map(|v| v.abs()).sum::<f64>() / features.len().max(1) as f64;
|
||||
coords[3] = (1.0 / (1.0 + (-feat_magnitude + 1.0).exp())).clamp(0.1, 0.99);
|
||||
|
||||
keypoints.push(coords);
|
||||
|
|
@ -1484,8 +1565,7 @@ async fn start_training(
|
|||
|
||||
let state_clone = state.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
real_training_loop(state_clone, progress_tx, config, dataset_ids, "supervised")
|
||||
.await;
|
||||
real_training_loop(state_clone, progress_tx, config, dataset_ids, "supervised").await;
|
||||
});
|
||||
|
||||
{
|
||||
|
|
@ -1571,8 +1651,7 @@ async fn start_pretrain(
|
|||
let state_clone = state.clone();
|
||||
let dataset_ids = body.dataset_ids.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
real_training_loop(state_clone, progress_tx, config, dataset_ids, "pretrain")
|
||||
.await;
|
||||
real_training_loop(state_clone, progress_tx, config, dataset_ids, "pretrain").await;
|
||||
});
|
||||
|
||||
{
|
||||
|
|
@ -1632,8 +1711,7 @@ async fn start_lora_training(
|
|||
let state_clone = state.clone();
|
||||
let dataset_ids = body.dataset_ids.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
real_training_loop(state_clone, progress_tx, config, dataset_ids, "lora")
|
||||
.await;
|
||||
real_training_loop(state_clone, progress_tx, config, dataset_ids, "lora").await;
|
||||
});
|
||||
|
||||
{
|
||||
|
|
@ -1677,9 +1755,7 @@ async fn handle_train_ws_client(mut socket: WebSocket, state: AppState) {
|
|||
"type": "status",
|
||||
"data": serde_json::from_str::<serde_json::Value>(&json).unwrap_or_default(),
|
||||
});
|
||||
let _ = socket
|
||||
.send(Message::Text(msg.to_string().into()))
|
||||
.await;
|
||||
let _ = socket.send(Message::Text(msg.to_string().into())).await;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1888,13 +1964,16 @@ mod tests {
|
|||
fn pck_perfect_prediction() {
|
||||
// Build targets where torso height is large so threshold is generous.
|
||||
let mut tgt = vec![0.0; N_TARGETS];
|
||||
tgt[1] = 0.0; // nose y
|
||||
tgt[1] = 0.0; // nose y
|
||||
tgt[34] = 100.0; // left hip y
|
||||
tgt[37] = 100.0; // right hip y
|
||||
let preds = vec![tgt.clone()];
|
||||
let targets = vec![tgt];
|
||||
let pck = compute_pck(&preds, &targets, 0.2);
|
||||
assert!((pck - 1.0).abs() < 1e-9, "Perfect prediction should give PCK=1.0");
|
||||
assert!(
|
||||
(pck - 1.0).abs() < 1e-9,
|
||||
"Perfect prediction should give PCK=1.0"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -149,7 +149,16 @@ fn bench_config_validate(c: &mut Criterion) {
|
|||
// PCK computation benchmark (pure Rust, no tch dependency)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Inline PCK@threshold computation for a single (pred, gt) sample.
|
||||
/// Inline raw-threshold PCK for a single (pred, gt) sample — **BENCH FIXTURE
|
||||
/// ONLY**.
|
||||
///
|
||||
/// DO NOT USE for reported metrics (ADR-155 §Tier-1.1). This is a deliberately
|
||||
/// trivial `dist ≤ threshold` kernel chosen to exercise the hot loop without a
|
||||
/// torso-normalization step; it is NOT the canonical metric. The single source
|
||||
/// of truth for any reported PCK is
|
||||
/// `wifi_densepose_train::metrics::pck_canonical` (torso-normalized, COCO
|
||||
/// convention). This local copy exists only so the bench can run without the
|
||||
/// tch-gated `metrics` module.
|
||||
#[inline(always)]
|
||||
fn compute_pck(pred: &[[f32; 2]], gt: &[[f32; 2]], threshold: f32) -> f32 {
|
||||
let n = pred.len();
|
||||
|
|
|
|||
|
|
@ -1,16 +1,40 @@
|
|||
//! Evaluation metrics for WiFi-DensePose training.
|
||||
//!
|
||||
//! This module provides:
|
||||
//! # CANONICAL METRIC (ADR-155 §Tier-1.1 — single source of truth)
|
||||
//!
|
||||
//! - **PCK\@0.2** (Percentage of Correct Keypoints): a keypoint is considered
|
||||
//! correct when its Euclidean distance from the ground truth is within 20%
|
||||
//! of the person bounding-box diagonal.
|
||||
//! - **OKS** (Object Keypoint Similarity): the COCO-style metric that uses a
|
||||
//! per-joint exponential kernel with sigmas from the COCO annotation
|
||||
//! guidelines.
|
||||
//! As of ADR-155 there is exactly **one** definition of PCK and one of OKS
|
||||
//! that may be used for any *reported / claimed* number. They live in the
|
||||
//! [`canonical`] region of this module:
|
||||
//!
|
||||
//! Results are accumulated over mini-batches via [`MetricsAccumulator`] and
|
||||
//! finalized into a [`MetricsResult`] at the end of a validation epoch.
|
||||
//! - [`pck_canonical`] — **PCK\@k, torso-normalized.** A keypoint `j` is
|
||||
//! correct iff `‖pred_j − gt_j‖₂ ≤ k · torso`, where
|
||||
//! `torso = ‖left_hip(11) − right_hip(12)‖₂` in the *same* coordinate space
|
||||
//! as the keypoints. This matches the COCO / ADR-152 convention validated in
|
||||
//! `benchmarks/wiflow-std/RESULTS.md` (the ~96% PCK@20 reproduction). When
|
||||
//! the two hip joints are not both visible we fall back to the diagonal of
|
||||
//! the visible-keypoint bounding box (a stable, scale-aware normalizer).
|
||||
//! **Zero visible joints ⇒ PCK = 0.0** (no evidence of correctness — the
|
||||
//! opposite of the historical `MetricsAccumulator` bug that scored it 1.0).
|
||||
//!
|
||||
//! - [`oks_canonical`] — **OKS, COCO standard.** `s = sqrt(area)` where `area`
|
||||
//! is the GT keypoint bounding-box area *in the keypoint coordinate space*.
|
||||
//! Passing `s = 1.0` on normalized [0,1] coordinates is **forbidden** — it
|
||||
//! makes every distance ≈0 and OKS ≈1.0 ("fake Gold tier"); that historical
|
||||
//! bug is fixed here by always deriving `s` from the actual pose extent and
|
||||
//! returning 0.0 when the area is degenerate.
|
||||
//!
|
||||
//! `Trainer::evaluate`, `eval.rs`, `proof.rs`, the WiFlow-STD bench and
|
||||
//! `ruview_metrics` all route through these two functions.
|
||||
//!
|
||||
//! ## Deprecated / non-canonical (DO NOT USE for reported metrics)
|
||||
//!
|
||||
//! The following predate the unification and are retained only for internal
|
||||
//! callers / back-compat; each is annotated `#[deprecated]` and forwards to the
|
||||
//! canonical implementation where behaviour-compatible:
|
||||
//!
|
||||
//! - [`compute_pck_v2`] / [`compute_oks_v2`] / [`MetricsAccumulatorV2`]
|
||||
//! (hip↔hip torso but pixel-space, scale-from-area — folded into canonical).
|
||||
//! - `ruview_metrics`' bbox-diagonal PCK + its private OKS.
|
||||
//!
|
||||
//! # No mock data
|
||||
//!
|
||||
|
|
@ -51,6 +75,150 @@ pub const COCO_KP_SIGMAS: [f32; 17] = [
|
|||
0.089, // 16 right_ankle
|
||||
];
|
||||
|
||||
// ===========================================================================
|
||||
// CANONICAL METRIC — single source of truth (ADR-155 §Tier-1.1)
|
||||
// ===========================================================================
|
||||
|
||||
/// COCO joint index of the left hip.
|
||||
pub const CANON_LEFT_HIP: usize = 11;
|
||||
/// COCO joint index of the right hip.
|
||||
pub const CANON_RIGHT_HIP: usize = 12;
|
||||
|
||||
/// Canonical torso normalizer used by [`pck_canonical`].
|
||||
///
|
||||
/// Returns `‖left_hip − right_hip‖₂` (COCO joints 11↔12) when both hips are
|
||||
/// visible; otherwise the diagonal of the visible-keypoint bounding box. The
|
||||
/// distance is computed in whatever coordinate space `kpts` is expressed in
|
||||
/// (the canonical PCK requires pred and gt to share that space).
|
||||
///
|
||||
/// Returns `None` when there is no positive-extent reference available (no
|
||||
/// visible hips *and* a degenerate/empty visible bbox), signalling the caller
|
||||
/// that the sample cannot be scored.
|
||||
pub fn canonical_torso_size(gt_kpts: &Array2<f32>, visibility: &Array1<f32>) -> Option<f32> {
|
||||
let n = gt_kpts.shape()[0].min(visibility.len());
|
||||
if CANON_LEFT_HIP < n
|
||||
&& CANON_RIGHT_HIP < n
|
||||
&& visibility[CANON_LEFT_HIP] >= 0.5
|
||||
&& visibility[CANON_RIGHT_HIP] >= 0.5
|
||||
{
|
||||
let dx = gt_kpts[[CANON_LEFT_HIP, 0]] - gt_kpts[[CANON_RIGHT_HIP, 0]];
|
||||
let dy = gt_kpts[[CANON_LEFT_HIP, 1]] - gt_kpts[[CANON_RIGHT_HIP, 1]];
|
||||
let torso = (dx * dx + dy * dy).sqrt();
|
||||
if torso > 1e-6 {
|
||||
return Some(torso);
|
||||
}
|
||||
}
|
||||
// Fallback: bounding-box diagonal of visible keypoints.
|
||||
let diag = bounding_box_diagonal(gt_kpts, visibility, n);
|
||||
if diag > 1e-6 {
|
||||
Some(diag)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// **CANONICAL PCK\@`threshold`** — the single definition used for every
|
||||
/// reported number (ADR-155 §Tier-1.1).
|
||||
///
|
||||
/// A keypoint `j` with `visibility[j] >= 0.5` is *correct* iff
|
||||
/// `‖pred_j − gt_j‖₂ ≤ threshold · torso`, where `torso` is
|
||||
/// [`canonical_torso_size`] in the keypoint coordinate space.
|
||||
///
|
||||
/// # Returns
|
||||
/// `(correct, total, pck)` where `pck ∈ [0,1]`. **`(0, 0, 0.0)` when no
|
||||
/// keypoint is visible or the torso reference is degenerate** — a sample with
|
||||
/// no measurable evidence scores 0, never 1 (closes the
|
||||
/// `MetricsAccumulator` false-perfect bug).
|
||||
pub fn pck_canonical(
|
||||
pred_kpts: &Array2<f32>,
|
||||
gt_kpts: &Array2<f32>,
|
||||
visibility: &Array1<f32>,
|
||||
threshold: f32,
|
||||
) -> (usize, usize, f32) {
|
||||
let n = pred_kpts.shape()[0]
|
||||
.min(gt_kpts.shape()[0])
|
||||
.min(visibility.len());
|
||||
let torso = match canonical_torso_size(gt_kpts, visibility) {
|
||||
Some(t) => t,
|
||||
// No measurable reference scale ⇒ cannot score ⇒ 0.0 (NOT trivially 1.0).
|
||||
None => return (0, 0, 0.0),
|
||||
};
|
||||
let dist_threshold = threshold * torso;
|
||||
|
||||
let mut correct = 0usize;
|
||||
let mut total = 0usize;
|
||||
for j in 0..n {
|
||||
if visibility[j] < 0.5 {
|
||||
continue;
|
||||
}
|
||||
total += 1;
|
||||
let dx = pred_kpts[[j, 0]] - gt_kpts[[j, 0]];
|
||||
let dy = pred_kpts[[j, 1]] - gt_kpts[[j, 1]];
|
||||
if (dx * dx + dy * dy).sqrt() <= dist_threshold {
|
||||
correct += 1;
|
||||
}
|
||||
}
|
||||
let pck = if total > 0 {
|
||||
correct as f32 / total as f32
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
(correct, total, pck)
|
||||
}
|
||||
|
||||
/// **CANONICAL OKS** — COCO Object Keypoint Similarity (ADR-155 §Tier-1.1).
|
||||
///
|
||||
/// `OKS = Σⱼ exp(−dⱼ² / (2 s² kⱼ²)) · δ(vⱼ≥0.5) / Σⱼ δ(vⱼ≥0.5)` with
|
||||
/// `s = sqrt(area)` derived from the **GT keypoint bounding box in the
|
||||
/// keypoint coordinate space** (via [`canonical_torso_size`]² as a robust,
|
||||
/// always-positive proxy for area when an explicit bbox is unavailable).
|
||||
///
|
||||
/// Passing normalized [0,1] coordinates is fine *because the scale is derived
|
||||
/// from the pose itself* — there is no `s = 1.0` escape hatch that would make
|
||||
/// OKS ≈ 1.0 for any pose (the historical "fake Gold tier" bug).
|
||||
///
|
||||
/// Returns 0.0 when no keypoints are visible or the scale is degenerate.
|
||||
pub fn oks_canonical(
|
||||
pred_kpts: &Array2<f32>,
|
||||
gt_kpts: &Array2<f32>,
|
||||
visibility: &Array1<f32>,
|
||||
) -> f32 {
|
||||
let n = pred_kpts.shape()[0]
|
||||
.min(gt_kpts.shape()[0])
|
||||
.min(visibility.len());
|
||||
// Scale: area ≈ torso². Derived from the actual pose, never a fixed 1.0.
|
||||
let s = match canonical_torso_size(gt_kpts, visibility) {
|
||||
Some(t) => t,
|
||||
None => return 0.0,
|
||||
};
|
||||
let s_sq = s * s;
|
||||
if s_sq <= 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
let mut num = 0.0f32;
|
||||
let mut den = 0.0f32;
|
||||
for j in 0..n {
|
||||
if visibility[j] < 0.5 {
|
||||
continue;
|
||||
}
|
||||
den += 1.0;
|
||||
let dx = pred_kpts[[j, 0]] - gt_kpts[[j, 0]];
|
||||
let dy = pred_kpts[[j, 1]] - gt_kpts[[j, 1]];
|
||||
let d_sq = dx * dx + dy * dy;
|
||||
let k = if j < COCO_KP_SIGMAS.len() {
|
||||
COCO_KP_SIGMAS[j]
|
||||
} else {
|
||||
0.07
|
||||
};
|
||||
num += (-d_sq / (2.0 * s_sq * k * k)).exp();
|
||||
}
|
||||
if den > 0.0 {
|
||||
num / den
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// MetricsResult
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -174,74 +342,27 @@ impl MetricsAccumulator {
|
|||
|
||||
/// Update the accumulator with one sample's predictions.
|
||||
///
|
||||
/// Routes through the **canonical** [`pck_canonical`] / [`oks_canonical`]
|
||||
/// definitions (ADR-155 §Tier-1.1) so the trainer's reported numbers are
|
||||
/// identical to `eval.rs`, `proof.rs` and the WiFlow-STD bench.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// - `pred_kp`: `[17, 2]` – predicted keypoint (x, y) in `[0, 1]`.
|
||||
/// - `gt_kp`: `[17, 2]` – ground-truth keypoint (x, y) in `[0, 1]`.
|
||||
/// - `visibility`: `[17]` – 0 = invisible, 1/2 = visible.
|
||||
///
|
||||
/// Keypoints with `visibility == 0` are skipped.
|
||||
/// Keypoints with `visibility == 0` are skipped. A sample with no visible
|
||||
/// joints (or a degenerate torso reference) contributes PCK=0 / OKS=0 — it
|
||||
/// is **not** counted as trivially correct (closes the historical
|
||||
/// false-perfect bug).
|
||||
pub fn update(&mut self, pred_kp: &Array2<f32>, gt_kp: &Array2<f32>, visibility: &Array1<f32>) {
|
||||
let num_joints = pred_kp.shape()[0]
|
||||
.min(gt_kp.shape()[0])
|
||||
.min(visibility.len());
|
||||
let (_, visible_count, sample_pck) =
|
||||
pck_canonical(pred_kp, gt_kp, visibility, self.pck_threshold);
|
||||
let sample_oks = oks_canonical(pred_kp, gt_kp, visibility);
|
||||
|
||||
// Compute bounding-box diagonal from visible ground-truth keypoints.
|
||||
let bbox_diag = bounding_box_diagonal(gt_kp, visibility, num_joints);
|
||||
// Guard against degenerate (point) bounding boxes.
|
||||
let safe_diag = bbox_diag.max(1e-3);
|
||||
|
||||
let mut pck_correct = 0usize;
|
||||
let mut visible_count = 0usize;
|
||||
let mut oks_num = 0.0f64;
|
||||
let mut oks_den = 0.0f64;
|
||||
|
||||
for j in 0..num_joints {
|
||||
if visibility[j] < 0.5 {
|
||||
// Invisible joint: skip.
|
||||
continue;
|
||||
}
|
||||
visible_count += 1;
|
||||
|
||||
let dx = pred_kp[[j, 0]] - gt_kp[[j, 0]];
|
||||
let dy = pred_kp[[j, 1]] - gt_kp[[j, 1]];
|
||||
let dist = (dx * dx + dy * dy).sqrt();
|
||||
|
||||
// PCK: correct if within threshold × diagonal.
|
||||
if dist <= self.pck_threshold * safe_diag {
|
||||
pck_correct += 1;
|
||||
}
|
||||
|
||||
// OKS contribution for this joint.
|
||||
let sigma = if j < COCO_KP_SIGMAS.len() {
|
||||
COCO_KP_SIGMAS[j]
|
||||
} else {
|
||||
0.07 // fallback sigma for non-standard joints
|
||||
};
|
||||
// Normalise distance by (2 × sigma)² × (area = diagonal²).
|
||||
let two_sigma_sq = 2.0 * (sigma as f64) * (sigma as f64);
|
||||
let area = (safe_diag as f64) * (safe_diag as f64);
|
||||
let exp_arg = -(dist as f64 * dist as f64) / (two_sigma_sq * area + 1e-10);
|
||||
oks_num += exp_arg.exp();
|
||||
oks_den += 1.0;
|
||||
}
|
||||
|
||||
// Per-sample PCK (fraction of visible joints that were correct).
|
||||
let sample_pck = if visible_count > 0 {
|
||||
pck_correct as f64 / visible_count as f64
|
||||
} else {
|
||||
1.0 // No visible joints: trivially correct (no evidence of error).
|
||||
};
|
||||
|
||||
// Per-sample OKS.
|
||||
let sample_oks = if oks_den > 0.0 {
|
||||
oks_num / oks_den
|
||||
} else {
|
||||
1.0
|
||||
};
|
||||
|
||||
self.pck_sum += sample_pck;
|
||||
self.oks_sum += sample_oks;
|
||||
self.pck_sum += sample_pck as f64;
|
||||
self.oks_sum += sample_oks as f64;
|
||||
self.num_keypoints += visible_count;
|
||||
self.num_samples += 1;
|
||||
}
|
||||
|
|
@ -317,32 +438,13 @@ fn bounding_box_diagonal(kp: &Array2<f32>, visibility: &Array1<f32>, num_joints:
|
|||
// Per-sample PCK and OKS free functions (required by the training evaluator)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Keypoint indices for torso-diameter PCK normalisation (COCO ordering).
|
||||
const IDX_LEFT_HIP: usize = 11;
|
||||
const IDX_RIGHT_SHOULDER: usize = 6;
|
||||
|
||||
/// Compute the torso diameter for PCK normalisation.
|
||||
///
|
||||
/// Torso diameter = ||left_hip − right_shoulder||₂ in normalised [0,1] space.
|
||||
/// Returns 0.0 when either landmark is invisible, indicating the caller
|
||||
/// should fall back to a unit normaliser.
|
||||
fn torso_diameter_pck(gt_kpts: &Array2<f32>, visibility: &Array1<f32>) -> f32 {
|
||||
if visibility[IDX_LEFT_HIP] < 0.5 || visibility[IDX_RIGHT_SHOULDER] < 0.5 {
|
||||
return 0.0;
|
||||
}
|
||||
let dx = gt_kpts[[IDX_LEFT_HIP, 0]] - gt_kpts[[IDX_RIGHT_SHOULDER, 0]];
|
||||
let dy = gt_kpts[[IDX_LEFT_HIP, 1]] - gt_kpts[[IDX_RIGHT_SHOULDER, 1]];
|
||||
(dx * dx + dy * dy).sqrt()
|
||||
}
|
||||
|
||||
/// Compute PCK (Percentage of Correct Keypoints) for a single frame.
|
||||
///
|
||||
/// A keypoint `j` is "correct" when its Euclidean distance to the ground
|
||||
/// truth is within `threshold × torso_diameter` (left_hip ↔ right_shoulder).
|
||||
/// When the torso reference joints are not visible the threshold is applied
|
||||
/// directly in normalised [0,1] coordinate space (unit normaliser).
|
||||
///
|
||||
/// Only keypoints with `visibility[j] > 0` contribute to the count.
|
||||
/// Thin wrapper over the **canonical** [`pck_canonical`] (ADR-155 §Tier-1.1):
|
||||
/// torso-normalized by hip↔hip with bbox-diagonal fallback, and `(0,0,0.0)`
|
||||
/// for a sample with no measurable evidence. Prior to ADR-155 this used a
|
||||
/// hip↔shoulder torso and a unit-normalizer fallback — both replaced here so
|
||||
/// every call site agrees on one definition.
|
||||
///
|
||||
/// # Returns
|
||||
/// `(correct_count, total_count, pck_value)` where `pck_value ∈ [0,1]`;
|
||||
|
|
@ -353,38 +455,14 @@ pub fn compute_pck(
|
|||
visibility: &Array1<f32>,
|
||||
threshold: f32,
|
||||
) -> (usize, usize, f32) {
|
||||
let torso = torso_diameter_pck(gt_kpts, visibility);
|
||||
let norm = if torso > 1e-6 { torso } else { 1.0_f32 };
|
||||
let dist_threshold = threshold * norm;
|
||||
|
||||
let mut correct = 0_usize;
|
||||
let mut total = 0_usize;
|
||||
|
||||
for j in 0..17 {
|
||||
if visibility[j] < 0.5 {
|
||||
continue;
|
||||
}
|
||||
total += 1;
|
||||
let dx = pred_kpts[[j, 0]] - gt_kpts[[j, 0]];
|
||||
let dy = pred_kpts[[j, 1]] - gt_kpts[[j, 1]];
|
||||
let dist = (dx * dx + dy * dy).sqrt();
|
||||
if dist <= dist_threshold {
|
||||
correct += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let pck = if total > 0 {
|
||||
correct as f32 / total as f32
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
(correct, total, pck)
|
||||
pck_canonical(pred_kpts, gt_kpts, visibility, threshold)
|
||||
}
|
||||
|
||||
/// Compute per-joint PCK over a batch of frames.
|
||||
///
|
||||
/// Returns `[f32; 17]` where entry `j` is the fraction of frames in which
|
||||
/// joint `j` was both visible and correctly predicted at the given threshold.
|
||||
/// Uses the canonical torso normalizer ([`canonical_torso_size`]).
|
||||
pub fn compute_per_joint_pck(
|
||||
pred_batch: &[Array2<f32>],
|
||||
gt_batch: &[Array2<f32>],
|
||||
|
|
@ -398,9 +476,11 @@ pub fn compute_per_joint_pck(
|
|||
let mut total = [0_usize; 17];
|
||||
|
||||
for (pred, (gt, vis)) in pred_batch.iter().zip(gt_batch.iter().zip(vis_batch.iter())) {
|
||||
let torso = torso_diameter_pck(gt, vis);
|
||||
let norm = if torso > 1e-6 { torso } else { 1.0_f32 };
|
||||
let dist_thr = threshold * norm;
|
||||
// Canonical normalizer; skip frames with no measurable reference.
|
||||
let dist_thr = match canonical_torso_size(gt, vis) {
|
||||
Some(t) => threshold * t,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
for j in 0..17 {
|
||||
if vis[j] < 0.5 {
|
||||
|
|
@ -429,45 +509,21 @@ pub fn compute_per_joint_pck(
|
|||
|
||||
/// Compute Object Keypoint Similarity (OKS) for a single person.
|
||||
///
|
||||
/// COCO OKS formula:
|
||||
/// Thin wrapper over the **canonical** [`oks_canonical`] (ADR-155 §Tier-1.1).
|
||||
///
|
||||
/// ```text
|
||||
/// OKS = Σᵢ exp(-dᵢ² / (2·s²·kᵢ²)) · δ(vᵢ>0) / Σᵢ δ(vᵢ>0)
|
||||
/// ```
|
||||
///
|
||||
/// - `dᵢ` – Euclidean distance between predicted and GT keypoint `i`
|
||||
/// - `s` – object scale (`object_scale`; pass `1.0` when bbox is unknown)
|
||||
/// - `kᵢ` – per-joint sigma from [`COCO_KP_SIGMAS`]
|
||||
///
|
||||
/// Returns `0.0` when no keypoints are visible.
|
||||
/// The legacy `object_scale` parameter is **ignored**: passing `1.0` on
|
||||
/// normalized [0,1] coordinates was the "fake Gold tier" bug (every distance
|
||||
/// ≈ 0 ⇒ OKS ≈ 1.0 for any pose). The scale is now always derived from the GT
|
||||
/// pose extent, so the result is honest regardless of what scale a caller
|
||||
/// would have passed. The argument is retained only for signature
|
||||
/// compatibility and will be removed in a future cleanup.
|
||||
pub fn compute_oks(
|
||||
pred_kpts: &Array2<f32>,
|
||||
gt_kpts: &Array2<f32>,
|
||||
visibility: &Array1<f32>,
|
||||
object_scale: f32,
|
||||
_object_scale: f32,
|
||||
) -> f32 {
|
||||
let s_sq = object_scale * object_scale;
|
||||
let mut numerator = 0.0_f32;
|
||||
let mut denominator = 0.0_f32;
|
||||
|
||||
for j in 0..17 {
|
||||
if visibility[j] < 0.5 {
|
||||
continue;
|
||||
}
|
||||
denominator += 1.0;
|
||||
let dx = pred_kpts[[j, 0]] - gt_kpts[[j, 0]];
|
||||
let dy = pred_kpts[[j, 1]] - gt_kpts[[j, 1]];
|
||||
let d_sq = dx * dx + dy * dy;
|
||||
let k = COCO_KP_SIGMAS[j];
|
||||
let exp_arg = -d_sq / (2.0 * s_sq * k * k);
|
||||
numerator += exp_arg.exp();
|
||||
}
|
||||
|
||||
if denominator > 0.0 {
|
||||
numerator / denominator
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
oks_canonical(pred_kpts, gt_kpts, visibility)
|
||||
}
|
||||
|
||||
/// Aggregate result type returned by [`aggregate_metrics`].
|
||||
|
|
@ -886,9 +942,9 @@ pub fn find_augmenting_path(
|
|||
/// l_ankle, r_ankle.
|
||||
pub const COCO_KPT_SIGMAS: [f32; 17] = COCO_KP_SIGMAS;
|
||||
|
||||
/// COCO joint indices for hip-to-hip torso size used by PCK.
|
||||
const KPT_LEFT_HIP: usize = 11;
|
||||
const KPT_RIGHT_HIP: usize = 12;
|
||||
// (hip indices for the canonical normalizer live as CANON_LEFT_HIP /
|
||||
// CANON_RIGHT_HIP near the top of this module; the old per-region duplicates
|
||||
// were removed when the V2 path was folded into the canonical metric.)
|
||||
|
||||
// ── Spec MetricsResult ──────────────────────────────────────────────────────
|
||||
|
||||
|
|
@ -932,52 +988,41 @@ pub struct MetricsResultDetailed {
|
|||
/// * `image_size` — `(width, height)` in pixels
|
||||
///
|
||||
/// Returns `(overall_pck, per_joint_pck)`.
|
||||
#[deprecated(
|
||||
since = "ADR-155",
|
||||
note = "DO NOT USE for reported metrics — use pck_canonical. Retained for \
|
||||
back-compat; now forwards to the canonical definition (image_size \
|
||||
is ignored because canonical PCK is a scale-invariant ratio)."
|
||||
)]
|
||||
pub fn compute_pck_v2(
|
||||
pred_kpts: ArrayView2<f32>,
|
||||
gt_kpts: ArrayView2<f32>,
|
||||
visibility: ArrayView1<f32>,
|
||||
threshold: f32,
|
||||
image_size: (usize, usize),
|
||||
_image_size: (usize, usize),
|
||||
) -> (f32, [f32; 17]) {
|
||||
let (w, h) = image_size;
|
||||
let (wf, hf) = (w as f32, h as f32);
|
||||
|
||||
let lh_vis = visibility[KPT_LEFT_HIP] > 0.0;
|
||||
let rh_vis = visibility[KPT_RIGHT_HIP] > 0.0;
|
||||
|
||||
let torso_size = if lh_vis && rh_vis {
|
||||
let dx = (gt_kpts[[KPT_LEFT_HIP, 0]] - gt_kpts[[KPT_RIGHT_HIP, 0]]) * wf;
|
||||
let dy = (gt_kpts[[KPT_LEFT_HIP, 1]] - gt_kpts[[KPT_RIGHT_HIP, 1]]) * hf;
|
||||
(dx * dx + dy * dy).sqrt()
|
||||
} else {
|
||||
0.1 * (wf * wf + hf * hf).sqrt()
|
||||
};
|
||||
|
||||
let max_dist = threshold * torso_size;
|
||||
// Canonical PCK is a ratio (dist/torso) so the pixel scaling in the old
|
||||
// implementation cancelled out; route through the single source of truth.
|
||||
let pred = pred_kpts.to_owned();
|
||||
let gt = gt_kpts.to_owned();
|
||||
let vis = visibility.to_owned();
|
||||
let torso = canonical_torso_size(>, &vis);
|
||||
|
||||
let mut per_joint_pck = [0.0f32; 17];
|
||||
let mut total_visible = 0u32;
|
||||
let mut total_correct = 0u32;
|
||||
|
||||
for j in 0..17 {
|
||||
if visibility[j] <= 0.0 {
|
||||
continue;
|
||||
}
|
||||
total_visible += 1;
|
||||
let dx = (pred_kpts[[j, 0]] - gt_kpts[[j, 0]]) * wf;
|
||||
let dy = (pred_kpts[[j, 1]] - gt_kpts[[j, 1]]) * hf;
|
||||
if (dx * dx + dy * dy).sqrt() <= max_dist {
|
||||
total_correct += 1;
|
||||
per_joint_pck[j] = 1.0;
|
||||
let (_, _, overall) = pck_canonical(&pred, >, &vis, threshold);
|
||||
if let Some(t) = torso {
|
||||
let max_dist = threshold * t;
|
||||
for j in 0..17 {
|
||||
if vis[j] < 0.5 {
|
||||
continue;
|
||||
}
|
||||
let dx = pred[[j, 0]] - gt[[j, 0]];
|
||||
let dy = pred[[j, 1]] - gt[[j, 1]];
|
||||
if (dx * dx + dy * dy).sqrt() <= max_dist {
|
||||
per_joint_pck[j] = 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let overall = if total_visible == 0 {
|
||||
0.0
|
||||
} else {
|
||||
total_correct as f32 / total_visible as f32
|
||||
};
|
||||
|
||||
(overall, per_joint_pck)
|
||||
}
|
||||
|
||||
|
|
@ -991,6 +1036,14 @@ pub fn compute_pck_v2(
|
|||
/// [`COCO_KPT_SIGMAS`].
|
||||
///
|
||||
/// Returns 0.0 when no keypoints are visible or `area == 0`.
|
||||
#[deprecated(
|
||||
since = "ADR-155",
|
||||
note = "DO NOT USE for reported metrics — use oks_canonical. Retained for \
|
||||
back-compat. When `area <= 0` it still returns 0.0; otherwise it \
|
||||
uses the caller-supplied `area` as before so explicit-area callers \
|
||||
are unchanged, but new code should call oks_canonical which derives \
|
||||
scale from the pose and cannot be spoofed with area=1.0."
|
||||
)]
|
||||
pub fn compute_oks_v2(
|
||||
pred_kpts: ArrayView2<f32>,
|
||||
gt_kpts: ArrayView2<f32>,
|
||||
|
|
@ -1219,17 +1272,28 @@ impl MetricsAccumulatorV2 {
|
|||
pred: ArrayView2<f32>,
|
||||
gt: ArrayView2<f32>,
|
||||
vis: ArrayView1<f32>,
|
||||
image_size: (usize, usize),
|
||||
_image_size: (usize, usize),
|
||||
) {
|
||||
let (_, per_joint) = compute_pck_v2(pred, gt, vis, 0.2, image_size);
|
||||
// Route through the canonical metric (ADR-155 §Tier-1.1). `image_size`
|
||||
// is unused because canonical PCK is a scale-invariant ratio and OKS
|
||||
// derives its scale from the pose.
|
||||
let pred_o = pred.to_owned();
|
||||
let gt_o = gt.to_owned();
|
||||
let vis_o = vis.to_owned();
|
||||
let torso = canonical_torso_size(>_o, &vis_o);
|
||||
for j in 0..17 {
|
||||
if vis[j] > 0.0 {
|
||||
self.total_visible[j] += 1.0;
|
||||
self.total_correct[j] += per_joint[j];
|
||||
if let Some(t) = torso {
|
||||
let dx = pred[[j, 0]] - gt[[j, 0]];
|
||||
let dy = pred[[j, 1]] - gt[[j, 1]];
|
||||
if (dx * dx + dy * dy).sqrt() <= 0.2 * t {
|
||||
self.total_correct[j] += 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let area = kpt_bbox_area_v2(gt, vis, image_size);
|
||||
self.total_oks += compute_oks_v2(pred, gt, vis, area);
|
||||
self.total_oks += oks_canonical(&pred_o, >_o, &vis_o);
|
||||
self.num_samples += 1;
|
||||
}
|
||||
|
||||
|
|
@ -1267,30 +1331,9 @@ impl Default for MetricsAccumulatorV2 {
|
|||
}
|
||||
}
|
||||
|
||||
/// Estimate bounding-box area (pixels²) from visible GT keypoints.
|
||||
fn kpt_bbox_area_v2(gt: ArrayView2<f32>, vis: ArrayView1<f32>, image_size: (usize, usize)) -> f32 {
|
||||
let (w, h) = image_size;
|
||||
let (wf, hf) = (w as f32, h as f32);
|
||||
let mut x_min = f32::INFINITY;
|
||||
let mut x_max = f32::NEG_INFINITY;
|
||||
let mut y_min = f32::INFINITY;
|
||||
let mut y_max = f32::NEG_INFINITY;
|
||||
for j in 0..17 {
|
||||
if vis[j] <= 0.0 {
|
||||
continue;
|
||||
}
|
||||
let x = gt[[j, 0]] * wf;
|
||||
let y = gt[[j, 1]] * hf;
|
||||
x_min = x_min.min(x);
|
||||
x_max = x_max.max(x);
|
||||
y_min = y_min.min(y);
|
||||
y_max = y_max.max(y);
|
||||
}
|
||||
if x_min.is_infinite() {
|
||||
return 0.01 * wf * hf;
|
||||
}
|
||||
(x_max - x_min).max(1.0) * (y_max - y_min).max(1.0)
|
||||
}
|
||||
// kpt_bbox_area_v2 was removed in ADR-155: the V2 accumulator now derives its
|
||||
// OKS scale from the canonical pose extent (oks_canonical), so a separate
|
||||
// image-size-dependent area estimate is no longer needed.
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
|
|
@ -1333,15 +1376,19 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn all_invisible_gives_trivial_pck() {
|
||||
fn all_invisible_gives_zero_pck() {
|
||||
// ADR-155 §Tier-1.1: a sample with NO visible joints has no measurable
|
||||
// evidence of correctness ⇒ PCK = 0.0. (Previously this returned 1.0 —
|
||||
// the MetricsAccumulator false-perfect bug that let an empty/garbage
|
||||
// prediction inflate the reported metric.)
|
||||
let mut acc = MetricsAccumulator::default_threshold();
|
||||
let pred = Array2::zeros((17, 2));
|
||||
let gt = Array2::zeros((17, 2));
|
||||
let vis = Array1::zeros(17);
|
||||
acc.update(&pred, >, &vis);
|
||||
let result = acc.finalize().unwrap();
|
||||
// No visible joints → trivially "perfect" (no errors to measure)
|
||||
assert_abs_diff_eq!(result.pck, 1.0_f32, epsilon = 1e-5);
|
||||
assert_abs_diff_eq!(result.pck, 0.0_f32, epsilon = 1e-5);
|
||||
assert_abs_diff_eq!(result.oks, 0.0_f32, epsilon = 1e-5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -1422,12 +1469,19 @@ mod tests {
|
|||
Array1::ones(17)
|
||||
}
|
||||
|
||||
// A pose centred at (x, y) but with a NON-DEGENERATE torso: the two hips
|
||||
// (joints 11, 12) are offset so that the canonical hip↔hip normalizer is
|
||||
// positive (ADR-155 §Tier-1.1 — a zero-extent pose is correctly
|
||||
// unscoreable, so test fixtures must give the pose a real scale).
|
||||
fn uniform_kpts_17(x: f32, y: f32) -> Array2<f32> {
|
||||
let mut arr = Array2::zeros((17, 2));
|
||||
for j in 0..17 {
|
||||
arr[[j, 0]] = x;
|
||||
arr[[j, 1]] = y;
|
||||
}
|
||||
// Give the torso a 0.1-wide hip span so torso_size > 0.
|
||||
arr[[CANON_LEFT_HIP, 0]] = x - 0.05;
|
||||
arr[[CANON_RIGHT_HIP, 0]] = x + 0.05;
|
||||
arr
|
||||
}
|
||||
|
||||
|
|
@ -1584,13 +1638,16 @@ mod tests {
|
|||
|
||||
// ── Spec-required API tests ───────────────────────────────────────────────
|
||||
|
||||
// Non-degenerate all-visible pose for the V2 spec tests: hips offset so the
|
||||
// canonical normalizer is positive (ADR-155 §Tier-1.1).
|
||||
fn spec_pose_17() -> Array2<f32> {
|
||||
uniform_kpts_17(0.5, 0.5)
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(deprecated)] // compute_pck_v2 forwards to pck_canonical (ADR-155).
|
||||
fn spec_pck_v2_perfect() {
|
||||
let mut kpts = Array2::<f32>::zeros((17, 2));
|
||||
for j in 0..17 {
|
||||
kpts[[j, 0]] = 0.5;
|
||||
kpts[[j, 1]] = 0.5;
|
||||
}
|
||||
let kpts = spec_pose_17();
|
||||
let vis = Array1::ones(17_usize);
|
||||
let (pck, per_joint) =
|
||||
compute_pck_v2(kpts.view(), kpts.view(), vis.view(), 0.2, (256, 256));
|
||||
|
|
@ -1601,6 +1658,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[allow(deprecated)]
|
||||
fn spec_pck_v2_no_visible() {
|
||||
let kpts = Array2::<f32>::zeros((17, 2));
|
||||
let vis = Array1::zeros(17_usize);
|
||||
|
|
@ -1610,21 +1668,22 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn spec_oks_v2_perfect() {
|
||||
let mut kpts = Array2::<f32>::zeros((17, 2));
|
||||
for j in 0..17 {
|
||||
kpts[[j, 0]] = 0.5;
|
||||
kpts[[j, 1]] = 0.5;
|
||||
}
|
||||
// Now uses the canonical OKS (scale derived from the pose), which is the
|
||||
// honest definition (ADR-155 §Tier-1.1). Perfect prediction ⇒ OKS=1.0.
|
||||
let kpts = spec_pose_17();
|
||||
let vis = Array1::ones(17_usize);
|
||||
let oks = compute_oks_v2(kpts.view(), kpts.view(), vis.view(), 128.0 * 128.0);
|
||||
let oks = oks_canonical(&kpts, &kpts, &vis);
|
||||
assert!((oks - 1.0).abs() < 1e-5, "oks={oks}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spec_oks_v2_zero_area() {
|
||||
// A zero-extent (all-coincident) pose has no measurable scale ⇒ OKS=0.0
|
||||
// under the canonical definition — exactly the property that kills the
|
||||
// s=1.0 "fake Gold tier" bug.
|
||||
let kpts = Array2::<f32>::zeros((17, 2));
|
||||
let vis = Array1::ones(17_usize);
|
||||
let oks = compute_oks_v2(kpts.view(), kpts.view(), vis.view(), 0.0);
|
||||
let oks = oks_canonical(&kpts, &kpts, &vis);
|
||||
assert_eq!(oks, 0.0);
|
||||
}
|
||||
|
||||
|
|
@ -1662,11 +1721,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn spec_accumulator_v2_perfect() {
|
||||
let mut kpts = Array2::<f32>::zeros((17, 2));
|
||||
for j in 0..17 {
|
||||
kpts[[j, 0]] = 0.5;
|
||||
kpts[[j, 1]] = 0.5;
|
||||
}
|
||||
let kpts = spec_pose_17();
|
||||
let vis = Array1::ones(17_usize);
|
||||
let mut acc = MetricsAccumulatorV2::new();
|
||||
acc.update(kpts.view(), kpts.view(), vis.view(), (256, 256));
|
||||
|
|
@ -1690,13 +1745,87 @@ mod tests {
|
|||
assert_eq!(result.num_samples, 0);
|
||||
}
|
||||
|
||||
// ── Canonical metric: the ADR-155 bug-catching tests ─────────────────────
|
||||
|
||||
#[test]
|
||||
fn canonical_pck_zero_visible_is_zero_not_one() {
|
||||
// Regression test for the MetricsAccumulator false-perfect bug: a sample
|
||||
// with no visible joints must NOT score 1.0.
|
||||
let pred = Array2::<f32>::zeros((17, 2));
|
||||
let gt = Array2::<f32>::zeros((17, 2));
|
||||
let vis = Array1::<f32>::zeros(17);
|
||||
let (correct, total, pck) = pck_canonical(&pred, >, &vis, 0.2);
|
||||
assert_eq!((correct, total), (0, 0));
|
||||
assert_eq!(pck, 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn canonical_oks_not_one_for_wrong_pose_on_normalized_coords() {
|
||||
// Regression test for the s=1.0 "fake Gold tier" bug: a clearly wrong
|
||||
// prediction on normalized [0,1] coords must NOT yield OKS≈1.0, because
|
||||
// the scale is derived from the (small) pose extent, not a fixed 1.0.
|
||||
let mut gt = Array2::<f32>::zeros((17, 2));
|
||||
for j in 0..17 {
|
||||
gt[[j, 0]] = 0.5;
|
||||
gt[[j, 1]] = 0.5;
|
||||
}
|
||||
gt[[CANON_LEFT_HIP, 0]] = 0.45;
|
||||
gt[[CANON_RIGHT_HIP, 0]] = 0.55; // torso ≈ 0.1
|
||||
// Prediction off by 0.3 (3× the torso) — should be a poor OKS.
|
||||
let mut pred = gt.clone();
|
||||
for j in 0..17 {
|
||||
pred[[j, 0]] += 0.3;
|
||||
}
|
||||
let vis = Array1::<f32>::ones(17);
|
||||
let oks = oks_canonical(&pred, >, &vis);
|
||||
assert!(
|
||||
oks < 0.2,
|
||||
"wrong pose on normalized coords must not look near-perfect, got OKS={oks}"
|
||||
);
|
||||
// The old buggy path (s=1.0) would have returned ≈1.0 here.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn canonical_pck_uses_hip_to_hip_torso() {
|
||||
// torso = ‖hip11 − hip12‖ = 0.1; threshold 0.2 ⇒ max dist 0.02.
|
||||
let mut gt = Array2::<f32>::zeros((17, 2));
|
||||
for j in 0..17 {
|
||||
gt[[j, 0]] = 0.5;
|
||||
gt[[j, 1]] = 0.5;
|
||||
}
|
||||
gt[[CANON_LEFT_HIP, 0]] = 0.45;
|
||||
gt[[CANON_RIGHT_HIP, 0]] = 0.55;
|
||||
let torso = canonical_torso_size(>, &Array1::ones(17)).unwrap();
|
||||
assert!((torso - 0.1).abs() < 1e-6, "torso={torso}");
|
||||
|
||||
// A joint 0.015 away (< 0.02) is correct; 0.05 away (> 0.02) is not.
|
||||
let mut pred = gt.clone();
|
||||
pred[[0, 0]] += 0.015; // nose within tolerance
|
||||
pred[[5, 0]] += 0.05; // shoulder out of tolerance
|
||||
let vis = Array1::ones(17);
|
||||
let (_, _, pck) = pck_canonical(&pred, >, &vis, 0.2);
|
||||
// 16 of 17 within tolerance.
|
||||
assert!((pck - 16.0 / 17.0).abs() < 1e-5, "pck={pck}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn canonical_torso_falls_back_to_bbox_when_hips_hidden() {
|
||||
// Hips invisible ⇒ fall back to visible-keypoint bbox diagonal.
|
||||
let mut gt = Array2::<f32>::zeros((17, 2));
|
||||
gt[[0, 0]] = 0.0;
|
||||
gt[[0, 1]] = 0.0;
|
||||
gt[[5, 0]] = 0.3;
|
||||
gt[[5, 1]] = 0.4; // diagonal = 0.5
|
||||
let mut vis = Array1::<f32>::zeros(17);
|
||||
vis[0] = 1.0;
|
||||
vis[5] = 1.0;
|
||||
let torso = canonical_torso_size(>, &vis).unwrap();
|
||||
assert!((torso - 0.5).abs() < 1e-6, "fallback torso={torso}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spec_evaluate_dataset_v2_perfect() {
|
||||
let mut kpts = Array2::<f32>::zeros((17, 2));
|
||||
for j in 0..17 {
|
||||
kpts[[j, 0]] = 0.5;
|
||||
kpts[[j, 1]] = 0.5;
|
||||
}
|
||||
let kpts = spec_pose_17();
|
||||
let vis = Array1::ones(17_usize);
|
||||
let samples: Vec<(Array2<f32>, Array1<f32>)> =
|
||||
(0..4).map(|_| (kpts.clone(), vis.clone())).collect();
|
||||
|
|
|
|||
Loading…
Reference in New Issue