From b2e2e6d6fd6b6656c1b2817d36d9677315ca86da Mon Sep 17 00:00:00 2001 From: rUv Date: Mon, 18 May 2026 08:18:18 -0400 Subject: [PATCH] fix(sensing-server): WS broadcast emits effective_source() not hardcoded "esp32" (closes #618) (#621) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by @ArnonEnbar with a complete reproduction. broadcast_tick_task() re-emits the cached `latest_update` every tick so pose WS clients keep getting data even when ESP32 pauses between frames. The `source` field of that cached update was set to "esp32" at the moment a fresh ESP32 frame was last decoded (main.rs:3885, :4136). After the ESP32 loses power or network, no fresh frame is decoded — the cached `latest_update` is still re-broadcast every tick with the stale source: "esp32" baked in. UI's "Sensing" tab keeps showing "LIVE — ESP32 HARDWARE Connected" with frozen vitals/features/ classification re-broadcast indefinitely. REST `/health` correctly reports source: "esp32:offline" (via effective_source(), which checks last_esp32_frame elapsed time against ESP32_OFFLINE_TIMEOUT=5s) — but the WS broadcast path was the one consumer that didn't call it. Fix: clone the cached update per tick, overwrite source with s.effective_source(), then serialize and broadcast. UI now switches to "esp32:offline" on the same 5s budget as the REST surface. cargo build -p wifi-densepose-sensing-server --no-default-features: 17s, no errors (1 pre-existing unused-import warning unchanged). --- CHANGELOG.md | 1 + v2/crates/wifi-densepose-sensing-server/src/main.rs | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2655037..0c122af9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Pre-fix, unauthenticated callers could read `../../etc/passwd`-style paths, write arbitrary JSONL files, load attacker-controlled `.rvf` model files, or delete arbitrary files the server process could touch. 9 unit tests in `path_safety::tests` exercise the rejection envelope (empty, too-long, path separators, parent-dir traversal, null byte, whitespace/specials, non-ASCII). ### Fixed +- **WebSocket `/ws/sensing` now reports `esp32:offline` when ESP32 hardware goes stale** (closes #618). `broadcast_tick_task` was re-emitting the cached `latest_update` with a frozen `source: "esp32"` field forever after the hardware lost power or network. The REST `/health` endpoint already called `effective_source()` (which returns `"esp32:offline"` after `ESP32_OFFLINE_TIMEOUT` = 5 s with no UDP frames), but the WS broadcast path was the one consumer that didn't. Result: the UI's "LIVE — ESP32 HARDWARE Connected" banner stayed green long after the hardware went away, and `vital_signs`/`features`/`classification` re-broadcasted the last-seen values indefinitely. Fix: clone the cached `latest_update` per tick, overwrite `source` with `s.effective_source()`, then serialize and broadcast. UI can now switch to an offline state on the same 5-second budget the REST surface uses. - **Proof replay (`archive/v1/data/proof/verify.py`) is now cross-platform deterministic** (closes #560). Three changes together: (1) `features_to_bytes()` now `np.round(.., HASH_QUANTIZATION_DECIMALS=6)`s each feature array before packing as little-endian f64, collapsing ULP-level drift from scipy.fft pocketfft SIMD reordering; (2) the `Verify Pipeline Determinism` workflow pins `OMP_NUM_THREADS=1`, `OPENBLAS_NUM_THREADS=1`, `MKL_NUM_THREADS=1`, `VECLIB_MAXIMUM_THREADS=1`, `NUMEXPR_NUM_THREADS=1` — multi-threaded BLAS reductions were a deeper source of non-determinism than SIMD reordering, and 6-decimal quantization alone wasn't enough across Azure VM microarchitectures; (3) `expected_features.sha256` regenerated under the new conditions. CI now passes the determinism check (same hash across consecutive runs on canonical Linux x86_64 CI runner: `667eb054c44ac510342665bf9c93d608868a8ead948ae8774b2796ebce6f8fe7`). `scripts/probe-fft-platform.py` updated to mirror `HASH_QUANTIZATION_DECIMALS=6` for cross-machine spot-checks. - **`archive/v1/src/services/pose_service.py:223` calls the right method on `PhaseSanitizer`** (closes #612). The call was `self.phase_sanitizer.sanitize(phase_data)`, but `PhaseSanitizer`'s full-pipeline entry point is named `sanitize_phase()` (`unwrap_phase` + `remove_outliers` + `smooth_phase` chained, see `archive/v1/src/core/phase_sanitizer.py:266`). The shorter `sanitize` name doesn't exist on the class, so any path that reached this branch raised `AttributeError` and crashed the pose service mid-frame. - **`adaptive_classifier.rs:94` no longer panics on NaN feature values** (closes #611). diff --git a/v2/crates/wifi-densepose-sensing-server/src/main.rs b/v2/crates/wifi-densepose-sensing-server/src/main.rs index 25f1eb50..07585868 100644 --- a/v2/crates/wifi-densepose-sensing-server/src/main.rs +++ b/v2/crates/wifi-densepose-sensing-server/src/main.rs @@ -4331,7 +4331,18 @@ async fn broadcast_tick_task(state: SharedState, tick_ms: u64) { if s.tx.receiver_count() > 0 { // Re-broadcast the latest sensing_update so pose WS clients // always get data even when ESP32 pauses between frames. - if let Ok(json) = serde_json::to_string(update) { + // + // Issue #618: overwrite `source` with `effective_source()` + // before each broadcast so a stale latest_update (frozen + // payload from a now-offline ESP32) is emitted with + // `source: "esp32:offline"` instead of `source: "esp32"`. + // The REST `/health` endpoint already does this; before + // this fix the WS path was the only consumer that didn't, + // so the UI's "LIVE — ESP32 HARDWARE Connected" banner + // stayed green long after the hardware went away. + let mut tagged = update.clone(); + tagged.source = s.effective_source(); + if let Ok(json) = serde_json::to_string(&tagged) { let _ = s.tx.send(json); } }