From 84e2c920fd106fc212b3ea5cf84047eae61208f1 Mon Sep 17 00:00:00 2001 From: ruv Date: Thu, 11 Jun 2026 19:57:16 -0400 Subject: [PATCH] =?UTF-8?q?fix(train):=20proof=20margin=20+=20committed-ha?= =?UTF-8?q?sh=20requirement=20(ADR-155=20=C2=A7Tier-1.4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The deterministic proof self-certified: PASS on any loss decrease (incl. 1e-9 noise) and a missing expected hash defaulted to PASS. - MIN_LOSS_DECREASE=1e-4: a run counts as learning only above float noise; a noise-only pipeline now FAILS. - is_pass() requires hash_matches==Some(true); no-hash -> SKIP (exit 2), never PASS. verify-training fails fast on a sub-margin loss before the hash compare, so a missing baseline cannot mask a non-learning pipeline. Documented honestly: the proof certifies reproducibility/determinism on a synthetic dataset, NOT that real data produced the weights nor that any accuracy claim is met. Tests: no_committed_hash_is_skip_not_pass, submargin_loss_change_fails_even_without_hash, committed_matching_hash_with_real_decrease_passes. Co-Authored-By: claude-flow --- .../src/bin/verify_training.rs | 31 ++++- v2/crates/wifi-densepose-train/src/proof.rs | 125 ++++++++++++++++-- 2 files changed, 141 insertions(+), 15 deletions(-) diff --git a/v2/crates/wifi-densepose-train/src/bin/verify_training.rs b/v2/crates/wifi-densepose-train/src/bin/verify_training.rs index 64613e46..5f3564d0 100644 --- a/v2/crates/wifi-densepose-train/src/bin/verify_training.rs +++ b/v2/crates/wifi-densepose-train/src/bin/verify_training.rs @@ -12,9 +12,12 @@ //! //! | Code | Meaning | //! |------|---------| -//! | 0 | PASS — hash matches AND loss decreased | -//! | 1 | FAIL — hash mismatch OR loss did not decrease | -//! | 2 | SKIP — no expected hash file found; run `--generate-hash` first | +//! | 0 | PASS — committed hash matches AND loss decreased ≥ margin | +//! | 1 | FAIL — hash mismatch OR loss did not decrease by the margin | +//! | 2 | SKIP — loss decreased but no committed hash to compare against | +//! +//! Note (ADR-155 §Tier-1.4): a sub-margin loss change is a **FAIL**, never a +//! SKIP — a missing baseline can no longer mask a non-learning pipeline. //! //! # Usage //! @@ -156,12 +159,32 @@ fn main() { println!(" Initial loss: {:.6}", result.initial_loss); println!(" Final loss: {:.6}", result.final_loss); println!( - " Loss decreased: {} ({:.6} → {:.6})", + " Loss decreased: {} (Δ={:.6}, need ≥ {:.0e}) ({:.6} → {:.6})", if result.loss_decreased { "YES" } else { "NO" }, + result.loss_decrease, + proof::MIN_LOSS_DECREASE, result.initial_loss, result.final_loss ); + // ADR-155 §Tier-1.4: a sub-margin / non-decrease is a FAIL regardless of + // whether an expected hash exists — it can never be silently downgraded to + // SKIP. Fail fast before the hash comparison. + if !result.loss_decreased { + println!(); + println!("[VERDICT] FAIL"); + println!("{}", "=".repeat(72)); + println!( + " REASON: loss did not decrease by the required margin \ + (Δ={:.6} < {:.0e}).", + result.loss_decrease, + proof::MIN_LOSS_DECREASE + ); + println!(" The optimiser is not measurably learning on the fixed proof problem."); + println!("{}", "=".repeat(72)); + std::process::exit(1); + } + if args.verbose { println!(); println!(" Loss trajectory ({} steps):", result.steps_completed); diff --git a/v2/crates/wifi-densepose-train/src/proof.rs b/v2/crates/wifi-densepose-train/src/proof.rs index b6114e4a..0ae3837d 100644 --- a/v2/crates/wifi-densepose-train/src/proof.rs +++ b/v2/crates/wifi-densepose-train/src/proof.rs @@ -16,8 +16,29 @@ //! # Trust Kill Switch //! //! Run `verify-training` to execute this proof. Exit code 0 = PASS, -//! 1 = FAIL (loss did not decrease or hash mismatch), 2 = SKIP (no hash -//! file to compare against). +//! 1 = FAIL (loss did not decrease by the required margin or hash mismatch), +//! 2 = SKIP (no committed hash file to compare against). +//! +//! # What this proves — and what it does NOT (ADR-155 §Tier-1.4) +//! +//! This proof certifies **reproducibility and determinism** of the training +//! pipeline: identical seeds ⇒ identical weights ⇒ identical hash, and the +//! optimiser measurably reduces the loss on a fixed synthetic problem. It does +//! **not** prove that the shipped model weights were produced from real MM-Fi +//! data, nor that any accuracy claim is met — it runs on a deterministic +//! synthetic dataset by construction. Accuracy claims are substantiated +//! separately (see `benchmarks/wiflow-std/RESULTS.md`). +//! +//! Two integrity hardenings were applied in ADR-155: +//! +//! 1. **Minimum-decrease margin.** A run only counts as "loss decreased" when +//! `initial − final ≥ `[`MIN_LOSS_DECREASE`]. Previously *any* decrease +//! (including 1e-9 float noise) passed, so a pipeline that does no real +//! learning could still self-certify. +//! 2. **No-hash is a SKIP, not a PASS.** [`ProofResult::is_pass`] now requires +//! a *committed* expected hash to match. An absent `expected_proof.sha256` +//! yields SKIP (exit 2), so a missing baseline can never be mistaken for a +//! green proof. use sha2::{Digest, Sha256}; use std::io::{Read, Write}; @@ -49,6 +70,15 @@ pub const PROOF_BATCH_SIZE: usize = 4; /// Number of synthetic samples in the proof dataset. pub const PROOF_DATASET_SIZE: usize = 200; +/// Minimum absolute loss decrease (initial − final) required for the proof to +/// count as "the optimiser is learning" (ADR-155 §Tier-1.4). +/// +/// Chosen well above f32/f64 round-off noise but far below the decrease a real +/// gradient step produces on this synthetic problem (observed Δ ≫ 1e-2 over +/// [`N_PROOF_STEPS`]). A run whose loss only wanders by float noise now FAILS +/// instead of self-certifying on a 1e-9 "decrease". +pub const MIN_LOSS_DECREASE: f64 = 1e-4; + /// Filename under `proof_dir` where the expected weight hash is stored. const EXPECTED_HASH_FILE: &str = "expected_proof.sha256"; @@ -63,8 +93,12 @@ pub struct ProofResult { pub initial_loss: f64, /// Training loss at the final step. pub final_loss: f64, - /// `true` when `final_loss < initial_loss`. + /// `true` when the loss decreased by at least [`MIN_LOSS_DECREASE`] + /// (`initial_loss − final_loss ≥ MIN_LOSS_DECREASE`). A sub-margin or + /// negative change is `false` — float noise no longer counts as learning. pub loss_decreased: bool, + /// Actual loss decrease `initial_loss − final_loss` (may be negative). + pub loss_decrease: f64, /// Loss at each of the [`N_PROOF_STEPS`] steps. pub loss_trajectory: Vec, /// SHA-256 hex digest of all model weight tensors. @@ -79,20 +113,28 @@ pub struct ProofResult { } impl ProofResult { - /// Returns `true` when the proof fully passes (loss decreased AND hash - /// matches, or hash is not yet stored). + /// Returns `true` only when the proof fully passes: the loss decreased by + /// at least [`MIN_LOSS_DECREASE`] **and** a committed expected hash exists + /// and matches (ADR-155 §Tier-1.4). + /// + /// A missing expected hash is **not** a pass — it is a [`Self::is_skip`]. + /// This prevents an absent baseline from being read as green. pub fn is_pass(&self) -> bool { - self.loss_decreased && self.hash_matches.unwrap_or(true) + self.loss_decreased && self.hash_matches == Some(true) } - /// Returns `true` when there is an expected hash and it does NOT match. + /// Returns `true` when the proof definitively fails: the loss did not + /// decrease by the required margin, or an expected hash exists and does + /// not match. pub fn is_fail(&self) -> bool { - self.loss_decreased == false || self.hash_matches == Some(false) + !self.loss_decreased || self.hash_matches == Some(false) } - /// Returns `true` when no expected hash file exists yet. + /// Returns `true` when no committed expected hash exists yet (cannot + /// confirm reproducibility ⇒ neither PASS nor FAIL). Note: a sub-margin + /// loss decrease is a FAIL, not a SKIP, even with no hash present. pub fn is_skip(&self) -> bool { - self.expected_hash.is_none() + self.expected_hash.is_none() && self.loss_decreased } } @@ -196,7 +238,9 @@ pub fn run_proof(proof_dir: &Path) -> Result= MIN_LOSS_DECREASE; // Compute model weight hash (uses varstore()). let model_hash = hash_model_weights(&model); @@ -212,6 +256,7 @@ pub fn run_proof(proof_dir: &Path) -> Result= MIN_LOSS_DECREASE, + loss_trajectory: vec![1.0, 1.0 - noise], + model_hash: "abc".into(), + expected_hash: None, + hash_matches: None, + steps_completed: 2, + }; + assert!( + !r.loss_decreased, + "sub-margin change must not count as decrease" + ); + assert!(r.is_fail(), "sub-margin change is a FAIL"); + assert!(!r.is_skip(), "sub-margin change is not a SKIP"); + assert!(!r.is_pass()); + } + + #[test] + fn committed_matching_hash_with_real_decrease_passes() { + let r = ProofResult { + initial_loss: 1.0, + final_loss: 0.5, + loss_decrease: 0.5, + loss_decreased: true, + loss_trajectory: vec![1.0, 0.5], + model_hash: "deadbeef".into(), + expected_hash: Some("deadbeef".into()), + hash_matches: Some(true), + steps_completed: 2, + }; + assert!(r.is_pass()); + assert!(!r.is_fail()); + assert!(!r.is_skip()); + } + #[test] fn generate_and_verify_hash_matches() { let tmp = tempdir().unwrap();