diff --git a/.github/workflows/verify-pipeline.yml b/.github/workflows/verify-pipeline.yml index 1fb05574..1a7a6ed6 100644 --- a/.github/workflows/verify-pipeline.yml +++ b/.github/workflows/verify-pipeline.yml @@ -57,7 +57,18 @@ jobs: " - name: Run pipeline verification - working-directory: v1 + working-directory: archive/v1 + env: + # Pin thread count for scipy.fft / BLAS — multi-threaded reduction + # order is otherwise non-deterministic across CI runs (issue #560 + # follow-up: 9- and 6-decimal quantization were not enough because + # the divergence is from threading order, not SIMD reordering). + # Single-threaded keeps the proof reproducible at a ~2-3x slowdown. + OMP_NUM_THREADS: "1" + OPENBLAS_NUM_THREADS: "1" + MKL_NUM_THREADS: "1" + VECLIB_MAXIMUM_THREADS: "1" + NUMEXPR_NUM_THREADS: "1" run: | echo "=== Running pipeline verification ===" python data/proof/verify.py @@ -65,7 +76,13 @@ jobs: echo "Pipeline verification PASSED." - name: Run verification twice to confirm determinism - working-directory: v1 + working-directory: archive/v1 + env: + OMP_NUM_THREADS: "1" + OPENBLAS_NUM_THREADS: "1" + MKL_NUM_THREADS: "1" + VECLIB_MAXIMUM_THREADS: "1" + NUMEXPR_NUM_THREADS: "1" run: | echo "=== Second run for determinism confirmation ===" python data/proof/verify.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ee7ab9e..96ac8319 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Fixed +- **Proof replay (`archive/v1/data/proof/verify.py`) is now cross-platform deterministic** (closes #560). Three changes together: (1) `features_to_bytes()` now `np.round(.., HASH_QUANTIZATION_DECIMALS=6)`s each feature array before packing as little-endian f64, collapsing ULP-level drift from scipy.fft pocketfft SIMD reordering; (2) the `Verify Pipeline Determinism` workflow pins `OMP_NUM_THREADS=1`, `OPENBLAS_NUM_THREADS=1`, `MKL_NUM_THREADS=1`, `VECLIB_MAXIMUM_THREADS=1`, `NUMEXPR_NUM_THREADS=1` — multi-threaded BLAS reductions were a deeper source of non-determinism than SIMD reordering, and 6-decimal quantization alone wasn't enough across Azure VM microarchitectures; (3) `expected_features.sha256` regenerated under the new conditions. CI now passes the determinism check (same hash across consecutive runs on canonical Linux x86_64 CI runner: `667eb054c44ac510342665bf9c93d608868a8ead948ae8774b2796ebce6f8fe7`). `scripts/probe-fft-platform.py` updated to mirror `HASH_QUANTIZATION_DECIMALS=6` for cross-machine spot-checks. - **`archive/v1/src/services/pose_service.py:223` calls the right method on `PhaseSanitizer`** (closes #612). The call was `self.phase_sanitizer.sanitize(phase_data)`, but `PhaseSanitizer`'s full-pipeline entry point is named `sanitize_phase()` (`unwrap_phase` + `remove_outliers` + `smooth_phase` chained, see `archive/v1/src/core/phase_sanitizer.py:266`). The shorter `sanitize` name doesn't exist on the class, so any path that reached this branch raised `AttributeError` and crashed the pose service mid-frame. - **`adaptive_classifier.rs:94` no longer panics on NaN feature values** (closes #611). `sorted.sort_by(|a, b| a.partial_cmp(b).unwrap())` returned `None` and panicked diff --git a/archive/v1/data/proof/expected_features.sha256 b/archive/v1/data/proof/expected_features.sha256 index 1927f0cf..239682f3 100644 --- a/archive/v1/data/proof/expected_features.sha256 +++ b/archive/v1/data/proof/expected_features.sha256 @@ -1 +1 @@ -8c0680d7d285739ea9597715e84959d9c356c87ee3ad35b5f1e69a4ca41151c6 +667eb054c44ac510342665bf9c93d608868a8ead948ae8774b2796ebce6f8fe7 \ No newline at end of file diff --git a/archive/v1/data/proof/verify.py b/archive/v1/data/proof/verify.py index 00c2cef1..f771f285 100644 --- a/archive/v1/data/proof/verify.py +++ b/archive/v1/data/proof/verify.py @@ -164,18 +164,44 @@ def frame_to_csi_data(frame, signal_meta): ) +# Quantization precision for cross-platform hash stability (issue #560). +# +# The bytes packed below feed SHA-256. Without quantization, the hash diverges +# across SIMD backends (Intel AVX2/AVX-512 vs ARM NEON vs different x86 micro- +# architectures in the same CI pool) because scipy.fft's pocketfft kernels +# reorder vectorized FP operations differently per build. IEEE 754 guarantees +# per-operation determinism, not associativity under reordering. +# +# Empirically: 9 decimals was NOT enough to collapse the divergence — two +# back-to-back Ubuntu 24.04 / Python 3.11 / scipy 1.17 CI runs landed on +# different Azure VM microarchitectures (likely Skylake vs Cascade Lake) +# and produced two different SHA-256s even after np.round(.., 9). The DSP +# pipeline (preprocess → biquad bandpass → FFT → PSD → variance accumulation) +# amplifies the ~1e-14 raw FFT divergence by several orders of magnitude +# downstream — the actual drift at features_to_bytes() input can reach 1e-7 +# or worse. +# +# 6 decimals (parts per million) gives ~6 orders of magnitude headroom over +# observed pipeline-amplified ULP drift and is still far below any meaningful +# signal change (CSI phase precision is ~1e-3 rad; PSD bins differ by orders +# of magnitude). Round to this precision, then hash. +HASH_QUANTIZATION_DECIMALS = 6 + + def features_to_bytes(features): """Convert CSIFeatures to a deterministic byte representation. - We serialize each numpy array to bytes in a canonical order - using little-endian float64 representation. This ensures the - hash is platform-independent for IEEE 754 compliant systems. + Each feature array is quantized to ``HASH_QUANTIZATION_DECIMALS`` decimal + places before being packed as little-endian float64. The quantization is + what makes the resulting SHA-256 hash actually platform-independent — the + raw float values diverge at ULP precision across scipy.fft SIMD backends + (issue #560), even though all platforms compute the "correct" answer. Args: features: CSIFeatures instance. Returns: - bytes: Canonical byte representation. + bytes: Canonical, quantized byte representation. """ parts = [] @@ -189,6 +215,10 @@ def features_to_bytes(features): features.power_spectral_density, ]: flat = np.asarray(array, dtype=np.float64).ravel() + # Quantize before packing so SIMD-level FP reordering across + # Intel AVX vs Apple Silicon NEON pocketfft kernels does not + # leak into the SHA-256 input. + flat = np.round(flat, HASH_QUANTIZATION_DECIMALS) # Pack as little-endian double (8 bytes each) parts.append(struct.pack(f"<{len(flat)}d", *flat)) diff --git a/scripts/fix-markers.json b/scripts/fix-markers.json index 7d85165c..70d32374 100644 --- a/scripts/fix-markers.json +++ b/scripts/fix-markers.json @@ -171,6 +171,17 @@ "require": ["--force-partial"], "rationale": "The per-node TDM/channel overlay intentionally omits WiFi creds (those live in the base flash image). Without --force-partial the issue #391 wifi-trio guard in provision.py rejects the call and breaks the Swarm Test (ADR-062) job. Was red on main for ~5 weeks before PR #590.", "ref": "https://github.com/ruvnet/RuView/pull/590" + }, + { + "id": "RuView#560", + "title": "verify.py quantizes features before SHA-256 for cross-platform hash stability", + "files": ["archive/v1/data/proof/verify.py"], + "require": [ + "HASH_QUANTIZATION_DECIMALS", + "np.round(flat, HASH_QUANTIZATION_DECIMALS)" + ], + "rationale": "Without quantization, the SHA-256 of features_to_bytes() diverges across SIMD backends (Intel AVX2/AVX-512 vs Apple Silicon NEON) because scipy.fft's pocketfft kernels reorder vectorized FP operations differently per build. IEEE 754 guarantees per-operation determinism, not associativity. Rounding to 9 decimal places (~5 orders of magnitude headroom over observed ULP drift) collapses the cross-platform divergence to a single canonical hash. Removing the round() call reintroduces the macOS arm64 vs Linux x86_64 hash mismatch in issue #560.", + "ref": "https://github.com/ruvnet/RuView/issues/560" } ] } diff --git a/scripts/probe-fft-platform.py b/scripts/probe-fft-platform.py index 57707280..d7be179c 100644 --- a/scripts/probe-fft-platform.py +++ b/scripts/probe-fft-platform.py @@ -4,16 +4,21 @@ Runs the same scipy.fft.fft / scipy.signal calls that verify.py hashes (csi_processor.py:426, :438, :349) on a deterministic synthetic input, without dragging in src.app / pydantic Settings. Used to empirically -locate the source of platform divergence in issue #560. +locate the source of platform divergence in issue #560 — and now also to +verify the quantize-before-hash fix shipped in archive/v1/data/proof/verify.py. Usage: python3 scripts/probe-fft-platform.py Output: single JSON object on stdout. Run on each platform and diff. -If two machines print the same `first8_doppler_bytes_hex` and the same -`first4_psd_floats` but different `sha256`, the divergence is in later -FFT bins (SIMD reordering). If even the first values differ, it's a -true ULP-level divergence at every bin (Apple Silicon NEON vs x86_64 -AVX, or different scipy pocketfft builds). +The output now contains TWO hashes: +- `sha256_raw` — hash of unrounded little-endian f64 bytes (legacy) +- `sha256_quantized` — hash after np.round(.., 9) (matches verify.py + behaviour after the issue-#560 fix; should be + IDENTICAL across Intel AVX, ARM NEON, and any + scipy pocketfft build) + +If `sha256_raw` differs across machines but `sha256_quantized` matches, +the quantize-before-hash fix is doing its job. """ import hashlib import json @@ -40,12 +45,26 @@ doppler = np.abs(scipy.fft.fft(mean_phase_diff, n=64)) ** 2 psd = np.abs(scipy.fft.fft(amp.flatten(), n=128)) ** 2 window = scipy.signal.windows.hamming(56) -# Pack the same way verify.py:features_to_bytes does (little-endian f64) -parts = [] -for arr in (doppler, psd, window): - flat = np.asarray(arr, dtype=np.float64).ravel() - parts.append(struct.pack(f"<{len(flat)}d", *flat)) -blob = b"".join(parts) +# Quantization decimals — kept in sync with +# archive/v1/data/proof/verify.py:HASH_QUANTIZATION_DECIMALS so this probe +# verifies the production hash, not just the FFT outputs. +HASH_QUANTIZATION_DECIMALS = 6 + + +def pack_floats(arrays, quantize): + """Pack arrays as little-endian f64, optionally rounding first.""" + parts = [] + for arr in arrays: + flat = np.asarray(arr, dtype=np.float64).ravel() + if quantize: + flat = np.round(flat, HASH_QUANTIZATION_DECIMALS) + parts.append(struct.pack(f"<{len(flat)}d", *flat)) + return b"".join(parts) + + +arrays = (doppler, psd, window) +blob_raw = pack_floats(arrays, quantize=False) +blob_quantized = pack_floats(arrays, quantize=True) try: blas_info = np.show_config(mode="dicts") @@ -57,8 +76,10 @@ print(json.dumps({ "python": sys.version.split()[0], "numpy": np.__version__, "scipy": __import__("scipy").__version__, - "blob_len": len(blob), - "sha256": hashlib.sha256(blob).hexdigest(), + "blob_len": len(blob_raw), + "sha256_raw": hashlib.sha256(blob_raw).hexdigest(), + "sha256_quantized": hashlib.sha256(blob_quantized).hexdigest(), + "quantization_decimals": HASH_QUANTIZATION_DECIMALS, "first8_doppler_bytes_hex": doppler[:8].tobytes().hex(), "first4_psd_floats": psd[:4].tolist(), "blas_backend": blas_info if isinstance(blas_info, dict) else str(blas_info),