From 6c230ed92823da56ba2440d4d538976b0ef8deef Mon Sep 17 00:00:00 2001 From: ruv Date: Fri, 15 May 2026 11:35:33 -0400 Subject: [PATCH] fix(verify): repoint wrapper to archive/v1 paths (#559) + cross-platform FFT probe (#560) #559: Fix ./verify wrapper The repo moved v1/ -> archive/v1/ but ./verify still pointed at the old paths and failed before reaching the proof script with: FAIL: Reference signal not found at .../v1/data/proof/sample_csi_data.json FAIL: verify.py not found at .../v1/data/proof/verify.py Update PROOF_DIR, V1_SRC, and two diagnostic strings to use archive/v1/. #560: Add platform probe + root-cause evidence Add scripts/probe-fft-platform.py that runs verify.py's hash-relevant scipy.fft.fft / scipy.signal.windows.hamming calls in isolation on a deterministic input (no pydantic Settings stack), so the source of divergence can be located across platforms. Tested on three machines via Tailscale: Windows (Intel AVX-512, numpy 2.4.2 / scipy 1.17.1): first4_psd_floats = [..., 94.40426770856882, ..., 51.677496924642476] sha256 = 78b3fb4acb8cc18c3e870f92e29ee98143c7cac4767f2f71b0fc384a82b92f6e ruvultra (Linux x86_64, numpy 1.26.4 / scipy 1.14.1): first4_psd_floats = [..., 94.40426770856882, ..., 51.677496924642476] sha256 = 41dc56416b6e8346d6457b1e3c9ca5d4b9035f645658e40e2eb668d08efaf9b6 ruv-mac-mini (Apple Silicon arm64/NEON, numpy 2.4.4 / scipy 1.17.1): first4_psd_floats = [..., 94.4042677085688, ..., 51.67749692464246] sha256 = 9b5e192b56d26a486eefe5dff6bb0e05f6223163a4246043fc168002d495efca Win and Linux agree on the first PSD/doppler values but produce different SHA-256s (later FFT bins diverge due to scipy version's pocketfft SIMD path). Mac arm64 differs from x86_64 at ULP precision (~2e-14 at value ~94 = ~1 ULP) on index 1 of the FIRST PSD bins. Root cause: SIMD-vectorized FFT reorders floating-point operations. NEON on Apple Silicon vs AVX2/AVX-512 on x86_64 produce ULP-different results, which a bit-exact SHA-256 cannot tolerate. The verify.py docstring at line 172 ("platform-independent for IEEE 754 compliant systems") is incorrect -- IEEE 754 guarantees per-operation determinism but vectorized FFT reorders ops. This commit ships the diagnostic probe + the #559 path fix only. The verify.py hash function itself (quantize-before-hash to absorb ULP divergence + regeneration of expected_features.sha256 on a canonical CI platform) is a follow-up that affects a published trust-anchor artifact -- left for maintainer decision. Verification: cd && ./verify # before: FAIL before reaching pipeline (v1/... not found) # after: reaches verify.py and runs the pipeline python3 scripts/probe-fft-platform.py # prints JSON with sha256 + first-few-floats per platform Refs: #559, #560 Co-Authored-By: claude-flow --- scripts/probe-fft-platform.py | 65 +++++++++++++++++++++++++++++++++++ verify | 8 ++--- 2 files changed, 69 insertions(+), 4 deletions(-) create mode 100644 scripts/probe-fft-platform.py diff --git a/scripts/probe-fft-platform.py b/scripts/probe-fft-platform.py new file mode 100644 index 00000000..57707280 --- /dev/null +++ b/scripts/probe-fft-platform.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +"""Platform probe: reproduce verify.py's hash-relevant FFT steps in isolation. + +Runs the same scipy.fft.fft / scipy.signal calls that verify.py hashes +(csi_processor.py:426, :438, :349) on a deterministic synthetic input, +without dragging in src.app / pydantic Settings. Used to empirically +locate the source of platform divergence in issue #560. + +Usage: python3 scripts/probe-fft-platform.py +Output: single JSON object on stdout. Run on each platform and diff. + +If two machines print the same `first8_doppler_bytes_hex` and the same +`first4_psd_floats` but different `sha256`, the divergence is in later +FFT bins (SIMD reordering). If even the first values differ, it's a +true ULP-level divergence at every bin (Apple Silicon NEON vs x86_64 +AVX, or different scipy pocketfft builds). +""" +import hashlib +import json +import platform +import struct +import sys + +import numpy as np +import scipy.fft +import scipy.signal + +# Deterministic synthetic input -- no IO, no .env, no Settings +rng = np.random.RandomState(42) +N_FRAMES = 100 +N_SUBC = 100 +amp = rng.randn(N_FRAMES, N_SUBC).astype(np.float64) + +# Mirror the three scipy calls verify.py's hash depends on: +# archive/v1/src/core/csi_processor.py:349 -> scipy.signal.windows.hamming +# archive/v1/src/core/csi_processor.py:426 -> scipy.fft.fft(mean_phase_diff, n=64) +# archive/v1/src/core/csi_processor.py:438 -> scipy.fft.fft(amp.flatten(), n=128) +mean_phase_diff = amp.mean(axis=1) +doppler = np.abs(scipy.fft.fft(mean_phase_diff, n=64)) ** 2 +psd = np.abs(scipy.fft.fft(amp.flatten(), n=128)) ** 2 +window = scipy.signal.windows.hamming(56) + +# Pack the same way verify.py:features_to_bytes does (little-endian f64) +parts = [] +for arr in (doppler, psd, window): + flat = np.asarray(arr, dtype=np.float64).ravel() + parts.append(struct.pack(f"<{len(flat)}d", *flat)) +blob = b"".join(parts) + +try: + blas_info = np.show_config(mode="dicts") +except Exception: + blas_info = {"error": "show_config(mode=dicts) unavailable"} + +print(json.dumps({ + "uname": platform.uname()._asdict(), + "python": sys.version.split()[0], + "numpy": np.__version__, + "scipy": __import__("scipy").__version__, + "blob_len": len(blob), + "sha256": hashlib.sha256(blob).hexdigest(), + "first8_doppler_bytes_hex": doppler[:8].tobytes().hex(), + "first4_psd_floats": psd[:4].tolist(), + "blas_backend": blas_info if isinstance(blas_info, dict) else str(blas_info), +}, indent=2, default=str)) diff --git a/verify b/verify index dd7eab57..02c50115 100755 --- a/verify +++ b/verify @@ -19,9 +19,9 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROOF_DIR="${SCRIPT_DIR}/v1/data/proof" +PROOF_DIR="${SCRIPT_DIR}/archive/v1/data/proof" VERIFY_PY="${PROOF_DIR}/verify.py" -V1_SRC="${SCRIPT_DIR}/v1/src" +V1_SRC="${SCRIPT_DIR}/archive/v1/src" # Colors (disabled if not a terminal) if [ -t 1 ]; then @@ -136,7 +136,7 @@ echo "" echo -e "${CYAN}[PHASE 3] PRODUCTION CODE INTEGRITY SCAN${RESET}" echo "" echo " Scanning ${V1_SRC} for np.random.rand / np.random.randn calls..." -echo " (Excluding v1/src/testing/ -- test helpers are allowed to use random.)" +echo " (Excluding archive/v1/src/testing/ -- test helpers are allowed to use random.)" echo "" MOCK_FINDINGS=0 @@ -204,7 +204,7 @@ elif [ $PIPELINE_EXIT -eq 2 ]; then echo -e " ${YELLOW}${BOLD}RESULT: SKIP${RESET}" echo "" echo " No expected hash file to compare against." - echo " Run: python v1/data/proof/verify.py --generate-hash" + echo " Run: python archive/v1/data/proof/verify.py --generate-hash" echo "" echo -e "${BOLD}======================================================================${RESET}" exit 2