fix(proof): cross-platform tolerance gate for verify.py determinism
Definitive root cause of the failing determinism gate: the SHA-256 of fixed-decimal-rounded features is bit-exact only WITHIN one CPU microarchitecture. Windows and a second Linux box (ruvultra, identical numpy 2.4.2/scipy 1.17.1) produce the same hash at every precision (ca58956c), but the GitHub Azure runner diverges at EVERY precision including 2 decimals (667eb054) — because pocketfft/BLAS reorders FP reductions per-microarch and the ~1e-6 *relative* drift lands on large-magnitude PSD bins as an absolute difference no fixed-decimal grid can absorb. So no quantization can fix it; the primitive was wrong. Fix: keep the bit-exact SHA-256 as the strong same-platform proof, and add a relative-tolerance fallback (np.allclose, rtol=1e-4/atol=1e-6) against a committed reference feature vector (expected_features_reference.npz, 36,800 float64 values). A run PASSES on either; tolerances sit ~100x over the observed microarch drift and ~10x under any signal-meaningful change, so real regressions still fail. Verified locally: bit-exact MATCH -> PASS, and a corrupted hash falls through to TOLERANCE MATCH -> PASS. CI (Azure, different hash) now passes via the tolerance path. Removes the temporary sweep diagnostic. Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
2d2b16a458
commit
b5a23b03e5
|
|
@ -58,20 +58,6 @@ jobs:
|
||||||
print('Reference signal metadata validated.')
|
print('Reference signal metadata validated.')
|
||||||
"
|
"
|
||||||
|
|
||||||
- name: Quantization sweep (diagnostic — cross-microarch precision)
|
|
||||||
working-directory: archive/v1
|
|
||||||
env:
|
|
||||||
OMP_NUM_THREADS: "1"
|
|
||||||
OPENBLAS_NUM_THREADS: "1"
|
|
||||||
MKL_NUM_THREADS: "1"
|
|
||||||
VECLIB_MAXIMUM_THREADS: "1"
|
|
||||||
NUMEXPR_NUM_THREADS: "1"
|
|
||||||
run: |
|
|
||||||
for d in 6 5 4 3 2; do
|
|
||||||
h=$(PROOF_HASH_DECIMALS=$d python data/proof/verify.py --generate-hash 2>/dev/null | grep -i 'Computed:' | sed 's/.*Computed:[[:space:]]*//')
|
|
||||||
echo "SWEEP d=$d $h"
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Run pipeline verification
|
- name: Run pipeline verification
|
||||||
working-directory: archive/v1
|
working-directory: archive/v1
|
||||||
env:
|
env:
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -232,6 +232,44 @@ def features_to_bytes(features):
|
||||||
return b"".join(parts)
|
return b"".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Cross-platform tolerance gate (issue #560 follow-up) ─────────────────────
|
||||||
|
# The SHA-256 of fixed-decimal-rounded features is bit-exact only WITHIN one
|
||||||
|
# CPU microarchitecture. The pocketfft / BLAS kernels in the manylinux
|
||||||
|
# numpy/scipy wheels reorder floating-point reductions differently across
|
||||||
|
# microarchs (e.g. a GitHub Azure runner vs a developer box vs another Linux
|
||||||
|
# host), and the resulting ~1e-6 *relative* drift lands on large-magnitude PSD
|
||||||
|
# bins as an absolute difference too large for ANY fixed-decimal grid to absorb
|
||||||
|
# (empirically the hash diverges across microarchs even at 2 decimals). So:
|
||||||
|
# • the hash is the strong, bit-exact, SAME-platform proof, and
|
||||||
|
# • a relative tolerance against a committed reference vector is the
|
||||||
|
# platform-INDEPENDENT proof.
|
||||||
|
# A run PASSES if either matches. Tolerances sit ~100x over the observed
|
||||||
|
# microarch drift and ~10x under any signal-meaningful change (CSI phase
|
||||||
|
# precision ~1e-3 rad), so real pipeline regressions still fail.
|
||||||
|
TOLERANCE_RTOL = 1e-4
|
||||||
|
TOLERANCE_ATOL = 1e-6
|
||||||
|
REFERENCE_VECTOR_FILENAME = "expected_features_reference.npz"
|
||||||
|
|
||||||
|
|
||||||
|
def features_to_vector(features):
|
||||||
|
"""Concatenate a frame's feature arrays as raw float64 (no rounding).
|
||||||
|
|
||||||
|
Mirrors ``features_to_bytes`` ordering but keeps full precision, for the
|
||||||
|
tolerance-based cross-platform comparison.
|
||||||
|
"""
|
||||||
|
arrays = [
|
||||||
|
features.amplitude_mean,
|
||||||
|
features.amplitude_variance,
|
||||||
|
features.phase_difference,
|
||||||
|
features.correlation_matrix,
|
||||||
|
features.doppler_shift,
|
||||||
|
features.power_spectral_density,
|
||||||
|
]
|
||||||
|
return np.concatenate(
|
||||||
|
[np.asarray(a, dtype=np.float64).ravel() for a in arrays]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def compute_pipeline_hash(data_path, verbose=False):
|
def compute_pipeline_hash(data_path, verbose=False):
|
||||||
"""Run the full pipeline and compute the SHA-256 hash of all features.
|
"""Run the full pipeline and compute the SHA-256 hash of all features.
|
||||||
|
|
||||||
|
|
@ -274,6 +312,7 @@ def compute_pipeline_hash(data_path, verbose=False):
|
||||||
features_count = 0
|
features_count = 0
|
||||||
total_feature_bytes = 0
|
total_feature_bytes = 0
|
||||||
last_features = None
|
last_features = None
|
||||||
|
feature_vectors = []
|
||||||
doppler_nonzero_count = 0
|
doppler_nonzero_count = 0
|
||||||
doppler_shape = None
|
doppler_shape = None
|
||||||
psd_shape = None
|
psd_shape = None
|
||||||
|
|
@ -290,6 +329,7 @@ def compute_pipeline_hash(data_path, verbose=False):
|
||||||
if features is not None:
|
if features is not None:
|
||||||
feature_bytes = features_to_bytes(features)
|
feature_bytes = features_to_bytes(features)
|
||||||
hasher.update(feature_bytes)
|
hasher.update(feature_bytes)
|
||||||
|
feature_vectors.append(features_to_vector(features))
|
||||||
features_count += 1
|
features_count += 1
|
||||||
total_feature_bytes += len(feature_bytes)
|
total_feature_bytes += len(feature_bytes)
|
||||||
last_features = features
|
last_features = features
|
||||||
|
|
@ -358,7 +398,11 @@ def compute_pipeline_hash(data_path, verbose=False):
|
||||||
"psd_shape": psd_shape,
|
"psd_shape": psd_shape,
|
||||||
}
|
}
|
||||||
|
|
||||||
return hasher.hexdigest(), stats
|
reference_vector = (
|
||||||
|
np.concatenate(feature_vectors) if feature_vectors else np.array([], dtype=np.float64)
|
||||||
|
)
|
||||||
|
|
||||||
|
return hasher.hexdigest(), reference_vector, stats
|
||||||
|
|
||||||
|
|
||||||
def audit_codebase(base_dir=None):
|
def audit_codebase(base_dir=None):
|
||||||
|
|
@ -474,7 +518,7 @@ def main():
|
||||||
print(" This runs the SAME CSIProcessor.preprocess_csi_data() and")
|
print(" This runs the SAME CSIProcessor.preprocess_csi_data() and")
|
||||||
print(" CSIProcessor.extract_features() used in production.")
|
print(" CSIProcessor.extract_features() used in production.")
|
||||||
print()
|
print()
|
||||||
computed_hash, stats = compute_pipeline_hash(data_path, verbose=args.verbose)
|
computed_hash, computed_vector, stats = compute_pipeline_hash(data_path, verbose=args.verbose)
|
||||||
|
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
# Step 3: Hash comparison
|
# Step 3: Hash comparison
|
||||||
|
|
@ -486,8 +530,11 @@ def main():
|
||||||
with open(hash_path, "w") as f:
|
with open(hash_path, "w") as f:
|
||||||
f.write(computed_hash + "\n")
|
f.write(computed_hash + "\n")
|
||||||
print(f" Wrote expected hash to {hash_path}")
|
print(f" Wrote expected hash to {hash_path}")
|
||||||
|
ref_path = os.path.join(SCRIPT_DIR, REFERENCE_VECTOR_FILENAME)
|
||||||
|
np.savez_compressed(ref_path, features=computed_vector)
|
||||||
|
print(f" Wrote reference vector ({computed_vector.size} values) to {ref_path}")
|
||||||
print()
|
print()
|
||||||
print(" HASH GENERATED -- run without --generate-hash to verify.")
|
print(" HASH + REFERENCE GENERATED -- run without --generate-hash to verify.")
|
||||||
print("=" * 72)
|
print("=" * 72)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -506,8 +553,36 @@ def main():
|
||||||
|
|
||||||
print(f" Expected: {expected_hash}")
|
print(f" Expected: {expected_hash}")
|
||||||
|
|
||||||
if computed_hash == expected_hash:
|
hash_match = computed_hash == expected_hash
|
||||||
match_status = "MATCH"
|
|
||||||
|
# Cross-platform fallback: if the bit-exact hash differs (different CPU
|
||||||
|
# microarchitecture reorders the pocketfft/BLAS reductions), accept the run
|
||||||
|
# when the raw feature vector matches the committed reference within a
|
||||||
|
# relative tolerance — platform-independent where the hash is not (#560).
|
||||||
|
tolerance_match = False
|
||||||
|
max_abs_dev = None
|
||||||
|
max_rel_dev = None
|
||||||
|
ref_path = os.path.join(SCRIPT_DIR, REFERENCE_VECTOR_FILENAME)
|
||||||
|
if not hash_match and os.path.exists(ref_path):
|
||||||
|
ref_vec = np.load(ref_path)["features"]
|
||||||
|
if ref_vec.shape == computed_vector.shape:
|
||||||
|
tolerance_match = bool(
|
||||||
|
np.allclose(
|
||||||
|
computed_vector, ref_vec, rtol=TOLERANCE_RTOL, atol=TOLERANCE_ATOL
|
||||||
|
)
|
||||||
|
)
|
||||||
|
diff = np.abs(computed_vector - ref_vec)
|
||||||
|
max_abs_dev = float(np.max(diff)) if diff.size else 0.0
|
||||||
|
max_rel_dev = (
|
||||||
|
float(np.max(diff / np.maximum(np.abs(ref_vec), 1e-12)))
|
||||||
|
if diff.size
|
||||||
|
else 0.0
|
||||||
|
)
|
||||||
|
|
||||||
|
if hash_match:
|
||||||
|
match_status = "MATCH (bit-exact)"
|
||||||
|
elif tolerance_match:
|
||||||
|
match_status = f"TOLERANCE MATCH (max rel dev {max_rel_dev:.2e})"
|
||||||
else:
|
else:
|
||||||
match_status = "MISMATCH"
|
match_status = "MISMATCH"
|
||||||
print(f" Status: {match_status}")
|
print(f" Status: {match_status}")
|
||||||
|
|
@ -535,14 +610,22 @@ def main():
|
||||||
# Final verdict
|
# Final verdict
|
||||||
# ---------------------------------------------------------------
|
# ---------------------------------------------------------------
|
||||||
print("=" * 72)
|
print("=" * 72)
|
||||||
if computed_hash == expected_hash:
|
if hash_match or tolerance_match:
|
||||||
print(" VERDICT: PASS")
|
print(" VERDICT: PASS")
|
||||||
print()
|
print()
|
||||||
print(" The pipeline produced a SHA-256 hash that matches the published")
|
if hash_match:
|
||||||
print(" expected hash. This proves:")
|
print(" The pipeline produced a SHA-256 hash that matches the published")
|
||||||
|
print(" expected hash (bit-exact). This proves:")
|
||||||
|
else:
|
||||||
|
print(" The bit-exact hash differs (CPU-microarchitecture FP reordering),")
|
||||||
|
print(" but the raw feature vector matches the published reference within")
|
||||||
|
print(
|
||||||
|
f" rtol={TOLERANCE_RTOL:g} / atol={TOLERANCE_ATOL:g} "
|
||||||
|
f"(max rel dev {max_rel_dev:.2e}). This proves:"
|
||||||
|
)
|
||||||
print(" 1. The SAME signal processing code ran on the reference signal")
|
print(" 1. The SAME signal processing code ran on the reference signal")
|
||||||
print(" 2. The output is DETERMINISTIC (same input -> same output)")
|
print(" 2. The output is DETERMINISTIC (same input -> same output)")
|
||||||
print(" 3. No randomness was introduced (hash would differ)")
|
print(" 3. No randomness was introduced")
|
||||||
print(" 4. The code path includes: noise removal, Hamming windowing,")
|
print(" 4. The code path includes: noise removal, Hamming windowing,")
|
||||||
print(" amplitude normalization, FFT-based Doppler extraction,")
|
print(" amplitude normalization, FFT-based Doppler extraction,")
|
||||||
print(" and power spectral density computation")
|
print(" and power spectral density computation")
|
||||||
|
|
@ -553,14 +636,19 @@ def main():
|
||||||
else:
|
else:
|
||||||
print(" VERDICT: FAIL")
|
print(" VERDICT: FAIL")
|
||||||
print()
|
print()
|
||||||
print(" The pipeline output does NOT match the expected hash.")
|
print(" The pipeline output does NOT match the expected hash OR the")
|
||||||
|
print(" reference feature vector within tolerance.")
|
||||||
|
if max_rel_dev is not None:
|
||||||
|
print(
|
||||||
|
f" max abs dev: {max_abs_dev:.3e} max rel dev: {max_rel_dev:.3e}"
|
||||||
|
f" (rtol={TOLERANCE_RTOL:g}, atol={TOLERANCE_ATOL:g})"
|
||||||
|
)
|
||||||
print()
|
print()
|
||||||
print(" Possible causes:")
|
print(" Possible causes:")
|
||||||
print(" - Numpy/scipy version mismatch (check requirements)")
|
|
||||||
print(" - Code change in CSI processor that alters numerical output")
|
print(" - Code change in CSI processor that alters numerical output")
|
||||||
print(" - Platform floating-point differences (unlikely for IEEE 754)")
|
print(" - A real (non-microarch) numerical regression")
|
||||||
print()
|
print()
|
||||||
print(" To update the expected hash after intentional changes:")
|
print(" To update after an intentional change:")
|
||||||
print(" python verify.py --generate-hash")
|
print(" python verify.py --generate-hash")
|
||||||
print("=" * 72)
|
print("=" * 72)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue