fix(train): proof margin + committed-hash requirement (ADR-155 §Tier-1.4)
The deterministic proof self-certified: PASS on any loss decrease (incl. 1e-9 noise) and a missing expected hash defaulted to PASS. - MIN_LOSS_DECREASE=1e-4: a run counts as learning only above float noise; a noise-only pipeline now FAILS. - is_pass() requires hash_matches==Some(true); no-hash -> SKIP (exit 2), never PASS. verify-training fails fast on a sub-margin loss before the hash compare, so a missing baseline cannot mask a non-learning pipeline. Documented honestly: the proof certifies reproducibility/determinism on a synthetic dataset, NOT that real data produced the weights nor that any accuracy claim is met. Tests: no_committed_hash_is_skip_not_pass, submargin_loss_change_fails_even_without_hash, committed_matching_hash_with_real_decrease_passes. Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
7fb3e33557
commit
84e2c920fd
|
|
@ -12,9 +12,12 @@
|
|||
//!
|
||||
//! | Code | Meaning |
|
||||
//! |------|---------|
|
||||
//! | 0 | PASS — hash matches AND loss decreased |
|
||||
//! | 1 | FAIL — hash mismatch OR loss did not decrease |
|
||||
//! | 2 | SKIP — no expected hash file found; run `--generate-hash` first |
|
||||
//! | 0 | PASS — committed hash matches AND loss decreased ≥ margin |
|
||||
//! | 1 | FAIL — hash mismatch OR loss did not decrease by the margin |
|
||||
//! | 2 | SKIP — loss decreased but no committed hash to compare against |
|
||||
//!
|
||||
//! Note (ADR-155 §Tier-1.4): a sub-margin loss change is a **FAIL**, never a
|
||||
//! SKIP — a missing baseline can no longer mask a non-learning pipeline.
|
||||
//!
|
||||
//! # Usage
|
||||
//!
|
||||
|
|
@ -156,12 +159,32 @@ fn main() {
|
|||
println!(" Initial loss: {:.6}", result.initial_loss);
|
||||
println!(" Final loss: {:.6}", result.final_loss);
|
||||
println!(
|
||||
" Loss decreased: {} ({:.6} → {:.6})",
|
||||
" Loss decreased: {} (Δ={:.6}, need ≥ {:.0e}) ({:.6} → {:.6})",
|
||||
if result.loss_decreased { "YES" } else { "NO" },
|
||||
result.loss_decrease,
|
||||
proof::MIN_LOSS_DECREASE,
|
||||
result.initial_loss,
|
||||
result.final_loss
|
||||
);
|
||||
|
||||
// ADR-155 §Tier-1.4: a sub-margin / non-decrease is a FAIL regardless of
|
||||
// whether an expected hash exists — it can never be silently downgraded to
|
||||
// SKIP. Fail fast before the hash comparison.
|
||||
if !result.loss_decreased {
|
||||
println!();
|
||||
println!("[VERDICT] FAIL");
|
||||
println!("{}", "=".repeat(72));
|
||||
println!(
|
||||
" REASON: loss did not decrease by the required margin \
|
||||
(Δ={:.6} < {:.0e}).",
|
||||
result.loss_decrease,
|
||||
proof::MIN_LOSS_DECREASE
|
||||
);
|
||||
println!(" The optimiser is not measurably learning on the fixed proof problem.");
|
||||
println!("{}", "=".repeat(72));
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
if args.verbose {
|
||||
println!();
|
||||
println!(" Loss trajectory ({} steps):", result.steps_completed);
|
||||
|
|
|
|||
|
|
@ -16,8 +16,29 @@
|
|||
//! # Trust Kill Switch
|
||||
//!
|
||||
//! Run `verify-training` to execute this proof. Exit code 0 = PASS,
|
||||
//! 1 = FAIL (loss did not decrease or hash mismatch), 2 = SKIP (no hash
|
||||
//! file to compare against).
|
||||
//! 1 = FAIL (loss did not decrease by the required margin or hash mismatch),
|
||||
//! 2 = SKIP (no committed hash file to compare against).
|
||||
//!
|
||||
//! # What this proves — and what it does NOT (ADR-155 §Tier-1.4)
|
||||
//!
|
||||
//! This proof certifies **reproducibility and determinism** of the training
|
||||
//! pipeline: identical seeds ⇒ identical weights ⇒ identical hash, and the
|
||||
//! optimiser measurably reduces the loss on a fixed synthetic problem. It does
|
||||
//! **not** prove that the shipped model weights were produced from real MM-Fi
|
||||
//! data, nor that any accuracy claim is met — it runs on a deterministic
|
||||
//! synthetic dataset by construction. Accuracy claims are substantiated
|
||||
//! separately (see `benchmarks/wiflow-std/RESULTS.md`).
|
||||
//!
|
||||
//! Two integrity hardenings were applied in ADR-155:
|
||||
//!
|
||||
//! 1. **Minimum-decrease margin.** A run only counts as "loss decreased" when
|
||||
//! `initial − final ≥ `[`MIN_LOSS_DECREASE`]. Previously *any* decrease
|
||||
//! (including 1e-9 float noise) passed, so a pipeline that does no real
|
||||
//! learning could still self-certify.
|
||||
//! 2. **No-hash is a SKIP, not a PASS.** [`ProofResult::is_pass`] now requires
|
||||
//! a *committed* expected hash to match. An absent `expected_proof.sha256`
|
||||
//! yields SKIP (exit 2), so a missing baseline can never be mistaken for a
|
||||
//! green proof.
|
||||
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::io::{Read, Write};
|
||||
|
|
@ -49,6 +70,15 @@ pub const PROOF_BATCH_SIZE: usize = 4;
|
|||
/// Number of synthetic samples in the proof dataset.
|
||||
pub const PROOF_DATASET_SIZE: usize = 200;
|
||||
|
||||
/// Minimum absolute loss decrease (initial − final) required for the proof to
|
||||
/// count as "the optimiser is learning" (ADR-155 §Tier-1.4).
|
||||
///
|
||||
/// Chosen well above f32/f64 round-off noise but far below the decrease a real
|
||||
/// gradient step produces on this synthetic problem (observed Δ ≫ 1e-2 over
|
||||
/// [`N_PROOF_STEPS`]). A run whose loss only wanders by float noise now FAILS
|
||||
/// instead of self-certifying on a 1e-9 "decrease".
|
||||
pub const MIN_LOSS_DECREASE: f64 = 1e-4;
|
||||
|
||||
/// Filename under `proof_dir` where the expected weight hash is stored.
|
||||
const EXPECTED_HASH_FILE: &str = "expected_proof.sha256";
|
||||
|
||||
|
|
@ -63,8 +93,12 @@ pub struct ProofResult {
|
|||
pub initial_loss: f64,
|
||||
/// Training loss at the final step.
|
||||
pub final_loss: f64,
|
||||
/// `true` when `final_loss < initial_loss`.
|
||||
/// `true` when the loss decreased by at least [`MIN_LOSS_DECREASE`]
|
||||
/// (`initial_loss − final_loss ≥ MIN_LOSS_DECREASE`). A sub-margin or
|
||||
/// negative change is `false` — float noise no longer counts as learning.
|
||||
pub loss_decreased: bool,
|
||||
/// Actual loss decrease `initial_loss − final_loss` (may be negative).
|
||||
pub loss_decrease: f64,
|
||||
/// Loss at each of the [`N_PROOF_STEPS`] steps.
|
||||
pub loss_trajectory: Vec<f64>,
|
||||
/// SHA-256 hex digest of all model weight tensors.
|
||||
|
|
@ -79,20 +113,28 @@ pub struct ProofResult {
|
|||
}
|
||||
|
||||
impl ProofResult {
|
||||
/// Returns `true` when the proof fully passes (loss decreased AND hash
|
||||
/// matches, or hash is not yet stored).
|
||||
/// Returns `true` only when the proof fully passes: the loss decreased by
|
||||
/// at least [`MIN_LOSS_DECREASE`] **and** a committed expected hash exists
|
||||
/// and matches (ADR-155 §Tier-1.4).
|
||||
///
|
||||
/// A missing expected hash is **not** a pass — it is a [`Self::is_skip`].
|
||||
/// This prevents an absent baseline from being read as green.
|
||||
pub fn is_pass(&self) -> bool {
|
||||
self.loss_decreased && self.hash_matches.unwrap_or(true)
|
||||
self.loss_decreased && self.hash_matches == Some(true)
|
||||
}
|
||||
|
||||
/// Returns `true` when there is an expected hash and it does NOT match.
|
||||
/// Returns `true` when the proof definitively fails: the loss did not
|
||||
/// decrease by the required margin, or an expected hash exists and does
|
||||
/// not match.
|
||||
pub fn is_fail(&self) -> bool {
|
||||
self.loss_decreased == false || self.hash_matches == Some(false)
|
||||
!self.loss_decreased || self.hash_matches == Some(false)
|
||||
}
|
||||
|
||||
/// Returns `true` when no expected hash file exists yet.
|
||||
/// Returns `true` when no committed expected hash exists yet (cannot
|
||||
/// confirm reproducibility ⇒ neither PASS nor FAIL). Note: a sub-margin
|
||||
/// loss decrease is a FAIL, not a SKIP, even with no hash present.
|
||||
pub fn is_skip(&self) -> bool {
|
||||
self.expected_hash.is_none()
|
||||
self.expected_hash.is_none() && self.loss_decreased
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -196,7 +238,9 @@ pub fn run_proof(proof_dir: &Path) -> Result<ProofResult, Box<dyn std::error::Er
|
|||
|
||||
let initial_loss = loss_trajectory.first().copied().unwrap_or(f64::NAN);
|
||||
let final_loss = loss_trajectory.last().copied().unwrap_or(f64::NAN);
|
||||
let loss_decreased = final_loss < initial_loss;
|
||||
// ADR-155 §Tier-1.4: require a real, above-noise decrease (not just any Δ).
|
||||
let loss_decrease = initial_loss - final_loss;
|
||||
let loss_decreased = loss_decrease >= MIN_LOSS_DECREASE;
|
||||
|
||||
// Compute model weight hash (uses varstore()).
|
||||
let model_hash = hash_model_weights(&model);
|
||||
|
|
@ -212,6 +256,7 @@ pub fn run_proof(proof_dir: &Path) -> Result<ProofResult, Box<dyn std::error::Er
|
|||
initial_loss,
|
||||
final_loss,
|
||||
loss_decreased,
|
||||
loss_decrease,
|
||||
loss_trajectory,
|
||||
model_hash,
|
||||
expected_hash,
|
||||
|
|
@ -463,6 +508,64 @@ mod tests {
|
|||
assert!(result.hash_matches.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_committed_hash_is_skip_not_pass() {
|
||||
// ADR-155 §Tier-1.4: a real proof run with NO committed expected hash
|
||||
// must be SKIP — never PASS. (Previously is_pass() defaulted a missing
|
||||
// hash to `true`, letting an unbaselined pipeline self-certify.)
|
||||
let tmp = tempdir().unwrap();
|
||||
let result = run_proof(tmp.path()).unwrap();
|
||||
assert!(result.expected_hash.is_none());
|
||||
assert!(!result.is_pass(), "no-hash must not be a PASS");
|
||||
// Loss genuinely decreases on the synthetic problem, so this is a SKIP.
|
||||
assert!(result.loss_decreased, "synthetic proof should learn");
|
||||
assert!(result.is_skip(), "no-hash with learning is a SKIP");
|
||||
assert!(!result.is_fail());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn submargin_loss_change_fails_even_without_hash() {
|
||||
// ADR-155 §Tier-1.4: a loss decrease below MIN_LOSS_DECREASE is a FAIL,
|
||||
// and the absence of a hash cannot downgrade it to SKIP.
|
||||
let noise = MIN_LOSS_DECREASE / 100.0;
|
||||
let r = ProofResult {
|
||||
initial_loss: 1.0,
|
||||
final_loss: 1.0 - noise,
|
||||
loss_decrease: noise,
|
||||
loss_decreased: noise >= MIN_LOSS_DECREASE,
|
||||
loss_trajectory: vec![1.0, 1.0 - noise],
|
||||
model_hash: "abc".into(),
|
||||
expected_hash: None,
|
||||
hash_matches: None,
|
||||
steps_completed: 2,
|
||||
};
|
||||
assert!(
|
||||
!r.loss_decreased,
|
||||
"sub-margin change must not count as decrease"
|
||||
);
|
||||
assert!(r.is_fail(), "sub-margin change is a FAIL");
|
||||
assert!(!r.is_skip(), "sub-margin change is not a SKIP");
|
||||
assert!(!r.is_pass());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn committed_matching_hash_with_real_decrease_passes() {
|
||||
let r = ProofResult {
|
||||
initial_loss: 1.0,
|
||||
final_loss: 0.5,
|
||||
loss_decrease: 0.5,
|
||||
loss_decreased: true,
|
||||
loss_trajectory: vec![1.0, 0.5],
|
||||
model_hash: "deadbeef".into(),
|
||||
expected_hash: Some("deadbeef".into()),
|
||||
hash_matches: Some(true),
|
||||
steps_completed: 2,
|
||||
};
|
||||
assert!(r.is_pass());
|
||||
assert!(!r.is_fail());
|
||||
assert!(!r.is_skip());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generate_and_verify_hash_matches() {
|
||||
let tmp = tempdir().unwrap();
|
||||
|
|
|
|||
Loading…
Reference in New Issue