From 84e2c920fd106fc212b3ea5cf84047eae61208f1 Mon Sep 17 00:00:00 2001
From: ruv <ruv@ruv.net>
Date: Thu, 11 Jun 2026 19:57:16 -0400
Subject: [PATCH] =?UTF-8?q?fix(train):=20proof=20margin=20+=20committed-ha?=
 =?UTF-8?q?sh=20requirement=20(ADR-155=20=C2=A7Tier-1.4)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The deterministic proof self-certified: PASS on any loss decrease (incl. 1e-9
noise) and a missing expected hash defaulted to PASS.

- MIN_LOSS_DECREASE=1e-4: a run counts as learning only above float noise; a
  noise-only pipeline now FAILS.
- is_pass() requires hash_matches==Some(true); no-hash -> SKIP (exit 2), never
  PASS. verify-training fails fast on a sub-margin loss before the hash compare,
  so a missing baseline cannot mask a non-learning pipeline.

Documented honestly: the proof certifies reproducibility/determinism on a
synthetic dataset, NOT that real data produced the weights nor that any accuracy
claim is met. Tests: no_committed_hash_is_skip_not_pass,
submargin_loss_change_fails_even_without_hash,
committed_matching_hash_with_real_decrease_passes.

Co-Authored-By: claude-flow <ruv@ruv.net>
---
 .../src/bin/verify_training.rs                |  31 ++++-
 v2/crates/wifi-densepose-train/src/proof.rs   | 125 ++++++++++++++++--
 2 files changed, 141 insertions(+), 15 deletions(-)

diff --git a/v2/crates/wifi-densepose-train/src/bin/verify_training.rs b/v2/crates/wifi-densepose-train/src/bin/verify_training.rs
index 64613e46..5f3564d0 100644
--- a/v2/crates/wifi-densepose-train/src/bin/verify_training.rs
+++ b/v2/crates/wifi-densepose-train/src/bin/verify_training.rs
@@ -12,9 +12,12 @@
 //!
 //! | Code | Meaning |
 //! |------|---------|
-//! | 0    | PASS — hash matches AND loss decreased |
-//! | 1    | FAIL — hash mismatch OR loss did not decrease |
-//! | 2    | SKIP — no expected hash file found; run `--generate-hash` first |
+//! | 0    | PASS — committed hash matches AND loss decreased ≥ margin |
+//! | 1    | FAIL — hash mismatch OR loss did not decrease by the margin |
+//! | 2    | SKIP — loss decreased but no committed hash to compare against |
+//!
+//! Note (ADR-155 §Tier-1.4): a sub-margin loss change is a **FAIL**, never a
+//! SKIP — a missing baseline can no longer mask a non-learning pipeline.
 //!
 //! # Usage
 //!
@@ -156,12 +159,32 @@ fn main() {
     println!("  Initial loss:    {:.6}", result.initial_loss);
     println!("  Final loss:      {:.6}", result.final_loss);
     println!(
-        "  Loss decreased:  {} ({:.6} → {:.6})",
+        "  Loss decreased:  {} (Δ={:.6}, need ≥ {:.0e}) ({:.6} → {:.6})",
         if result.loss_decreased { "YES" } else { "NO" },
+        result.loss_decrease,
+        proof::MIN_LOSS_DECREASE,
         result.initial_loss,
         result.final_loss
     );
 
+    // ADR-155 §Tier-1.4: a sub-margin / non-decrease is a FAIL regardless of
+    // whether an expected hash exists — it can never be silently downgraded to
+    // SKIP. Fail fast before the hash comparison.
+    if !result.loss_decreased {
+        println!();
+        println!("[VERDICT] FAIL");
+        println!("{}", "=".repeat(72));
+        println!(
+            "  REASON: loss did not decrease by the required margin \
+             (Δ={:.6} < {:.0e}).",
+            result.loss_decrease,
+            proof::MIN_LOSS_DECREASE
+        );
+        println!("  The optimiser is not measurably learning on the fixed proof problem.");
+        println!("{}", "=".repeat(72));
+        std::process::exit(1);
+    }
+
     if args.verbose {
         println!();
         println!("  Loss trajectory ({} steps):", result.steps_completed);
diff --git a/v2/crates/wifi-densepose-train/src/proof.rs b/v2/crates/wifi-densepose-train/src/proof.rs
index b6114e4a..0ae3837d 100644
--- a/v2/crates/wifi-densepose-train/src/proof.rs
+++ b/v2/crates/wifi-densepose-train/src/proof.rs
@@ -16,8 +16,29 @@
 //! # Trust Kill Switch
 //!
 //! Run `verify-training` to execute this proof.  Exit code 0 = PASS,
-//! 1 = FAIL (loss did not decrease or hash mismatch), 2 = SKIP (no hash
-//! file to compare against).
+//! 1 = FAIL (loss did not decrease by the required margin or hash mismatch),
+//! 2 = SKIP (no committed hash file to compare against).
+//!
+//! # What this proves — and what it does NOT (ADR-155 §Tier-1.4)
+//!
+//! This proof certifies **reproducibility and determinism** of the training
+//! pipeline: identical seeds ⇒ identical weights ⇒ identical hash, and the
+//! optimiser measurably reduces the loss on a fixed synthetic problem. It does
+//! **not** prove that the shipped model weights were produced from real MM-Fi
+//! data, nor that any accuracy claim is met — it runs on a deterministic
+//! synthetic dataset by construction. Accuracy claims are substantiated
+//! separately (see `benchmarks/wiflow-std/RESULTS.md`).
+//!
+//! Two integrity hardenings were applied in ADR-155:
+//!
+//! 1. **Minimum-decrease margin.** A run only counts as "loss decreased" when
+//!    `initial − final ≥ `[`MIN_LOSS_DECREASE`]. Previously *any* decrease
+//!    (including 1e-9 float noise) passed, so a pipeline that does no real
+//!    learning could still self-certify.
+//! 2. **No-hash is a SKIP, not a PASS.** [`ProofResult::is_pass`] now requires
+//!    a *committed* expected hash to match. An absent `expected_proof.sha256`
+//!    yields SKIP (exit 2), so a missing baseline can never be mistaken for a
+//!    green proof.
 
 use sha2::{Digest, Sha256};
 use std::io::{Read, Write};
@@ -49,6 +70,15 @@ pub const PROOF_BATCH_SIZE: usize = 4;
 /// Number of synthetic samples in the proof dataset.
 pub const PROOF_DATASET_SIZE: usize = 200;
 
+/// Minimum absolute loss decrease (initial − final) required for the proof to
+/// count as "the optimiser is learning" (ADR-155 §Tier-1.4).
+///
+/// Chosen well above f32/f64 round-off noise but far below the decrease a real
+/// gradient step produces on this synthetic problem (observed Δ ≫ 1e-2 over
+/// [`N_PROOF_STEPS`]). A run whose loss only wanders by float noise now FAILS
+/// instead of self-certifying on a 1e-9 "decrease".
+pub const MIN_LOSS_DECREASE: f64 = 1e-4;
+
 /// Filename under `proof_dir` where the expected weight hash is stored.
 const EXPECTED_HASH_FILE: &str = "expected_proof.sha256";
 
@@ -63,8 +93,12 @@ pub struct ProofResult {
     pub initial_loss: f64,
     /// Training loss at the final step.
     pub final_loss: f64,
-    /// `true` when `final_loss < initial_loss`.
+    /// `true` when the loss decreased by at least [`MIN_LOSS_DECREASE`]
+    /// (`initial_loss − final_loss ≥ MIN_LOSS_DECREASE`). A sub-margin or
+    /// negative change is `false` — float noise no longer counts as learning.
     pub loss_decreased: bool,
+    /// Actual loss decrease `initial_loss − final_loss` (may be negative).
+    pub loss_decrease: f64,
     /// Loss at each of the [`N_PROOF_STEPS`] steps.
     pub loss_trajectory: Vec<f64>,
     /// SHA-256 hex digest of all model weight tensors.
@@ -79,20 +113,28 @@ pub struct ProofResult {
 }
 
 impl ProofResult {
-    /// Returns `true` when the proof fully passes (loss decreased AND hash
-    /// matches, or hash is not yet stored).
+    /// Returns `true` only when the proof fully passes: the loss decreased by
+    /// at least [`MIN_LOSS_DECREASE`] **and** a committed expected hash exists
+    /// and matches (ADR-155 §Tier-1.4).
+    ///
+    /// A missing expected hash is **not** a pass — it is a [`Self::is_skip`].
+    /// This prevents an absent baseline from being read as green.
     pub fn is_pass(&self) -> bool {
-        self.loss_decreased && self.hash_matches.unwrap_or(true)
+        self.loss_decreased && self.hash_matches == Some(true)
     }
 
-    /// Returns `true` when there is an expected hash and it does NOT match.
+    /// Returns `true` when the proof definitively fails: the loss did not
+    /// decrease by the required margin, or an expected hash exists and does
+    /// not match.
     pub fn is_fail(&self) -> bool {
-        self.loss_decreased == false || self.hash_matches == Some(false)
+        !self.loss_decreased || self.hash_matches == Some(false)
     }
 
-    /// Returns `true` when no expected hash file exists yet.
+    /// Returns `true` when no committed expected hash exists yet (cannot
+    /// confirm reproducibility ⇒ neither PASS nor FAIL). Note: a sub-margin
+    /// loss decrease is a FAIL, not a SKIP, even with no hash present.
     pub fn is_skip(&self) -> bool {
-        self.expected_hash.is_none()
+        self.expected_hash.is_none() && self.loss_decreased
     }
 }
 
@@ -196,7 +238,9 @@ pub fn run_proof(proof_dir: &Path) -> Result<ProofResult, Box<dyn std::error::Er
 
     let initial_loss = loss_trajectory.first().copied().unwrap_or(f64::NAN);
     let final_loss = loss_trajectory.last().copied().unwrap_or(f64::NAN);
-    let loss_decreased = final_loss < initial_loss;
+    // ADR-155 §Tier-1.4: require a real, above-noise decrease (not just any Δ).
+    let loss_decrease = initial_loss - final_loss;
+    let loss_decreased = loss_decrease >= MIN_LOSS_DECREASE;
 
     // Compute model weight hash (uses varstore()).
     let model_hash = hash_model_weights(&model);
@@ -212,6 +256,7 @@ pub fn run_proof(proof_dir: &Path) -> Result<ProofResult, Box<dyn std::error::Er
         initial_loss,
         final_loss,
         loss_decreased,
+        loss_decrease,
         loss_trajectory,
         model_hash,
         expected_hash,
@@ -463,6 +508,64 @@ mod tests {
         assert!(result.hash_matches.is_none());
     }
 
+    #[test]
+    fn no_committed_hash_is_skip_not_pass() {
+        // ADR-155 §Tier-1.4: a real proof run with NO committed expected hash
+        // must be SKIP — never PASS. (Previously is_pass() defaulted a missing
+        // hash to `true`, letting an unbaselined pipeline self-certify.)
+        let tmp = tempdir().unwrap();
+        let result = run_proof(tmp.path()).unwrap();
+        assert!(result.expected_hash.is_none());
+        assert!(!result.is_pass(), "no-hash must not be a PASS");
+        // Loss genuinely decreases on the synthetic problem, so this is a SKIP.
+        assert!(result.loss_decreased, "synthetic proof should learn");
+        assert!(result.is_skip(), "no-hash with learning is a SKIP");
+        assert!(!result.is_fail());
+    }
+
+    #[test]
+    fn submargin_loss_change_fails_even_without_hash() {
+        // ADR-155 §Tier-1.4: a loss decrease below MIN_LOSS_DECREASE is a FAIL,
+        // and the absence of a hash cannot downgrade it to SKIP.
+        let noise = MIN_LOSS_DECREASE / 100.0;
+        let r = ProofResult {
+            initial_loss: 1.0,
+            final_loss: 1.0 - noise,
+            loss_decrease: noise,
+            loss_decreased: noise >= MIN_LOSS_DECREASE,
+            loss_trajectory: vec![1.0, 1.0 - noise],
+            model_hash: "abc".into(),
+            expected_hash: None,
+            hash_matches: None,
+            steps_completed: 2,
+        };
+        assert!(
+            !r.loss_decreased,
+            "sub-margin change must not count as decrease"
+        );
+        assert!(r.is_fail(), "sub-margin change is a FAIL");
+        assert!(!r.is_skip(), "sub-margin change is not a SKIP");
+        assert!(!r.is_pass());
+    }
+
+    #[test]
+    fn committed_matching_hash_with_real_decrease_passes() {
+        let r = ProofResult {
+            initial_loss: 1.0,
+            final_loss: 0.5,
+            loss_decrease: 0.5,
+            loss_decreased: true,
+            loss_trajectory: vec![1.0, 0.5],
+            model_hash: "deadbeef".into(),
+            expected_hash: Some("deadbeef".into()),
+            hash_matches: Some(true),
+            steps_completed: 2,
+        };
+        assert!(r.is_pass());
+        assert!(!r.is_fail());
+        assert!(!r.is_skip());
+    }
+
     #[test]
     fn generate_and_verify_hash_matches() {
         let tmp = tempdir().unwrap();