fix(pointcloud): exponential backoff on unreachable backend + status banner

When ?backend=<url> pointed at a server that wasn't running (e.g. user forgot to start ruview-pointcloud serve before clicking Connect ESP32), the viewer was retrying 10 Hz forever — flooding the console with ERR_CONNECTION_REFUSED and offering no guidance about what was wrong. Two fixes: 1. Replace setInterval(fetchCloud, 100) with self-rescheduling setTimeout. On success: 250 ms steady cadence. On failure for an explicit backend: 250 ms → 500 → 1 s → 2 s → 4 s → 8 s → 16 s → capped at 30 s. Resets to 250 ms the moment the backend comes back. Auto mode (Pages with no backend) still disables network entirely after the first 404. Strict-live mode (?live=1) also backs off so it doesn't spam. 2. Show an actionable status banner in the info panel when the chosen backend is unreachable: the URL, the actual error string, the next retry time, and the exact `cargo run` command to start the server. Visitor sees the diagnosis instead of staring at a 'demo' badge wondering why their ESP32 feed isn't visible. The scene keeps animating (face mesh / synthetic) while the viewer waits, so the tab never goes blank. Co-Authored-By: claude-flow <ruv@ruv.net>
2026-04-29 23:03:05 -04:00 · 2026-04-29 23:03:05 -04:00 · 9a078e4ac8
parent 0e39faac73
commit 9a078e4ac8
1 changed files with 67 additions and 9 deletions
--- a/v2/crates/wifi-densepose-pointcloud/src/viewer.html
+++ b/v2/crates/wifi-densepose-pointcloud/src/viewer.html
@ -477,15 +477,31 @@

        // Once auto mode confirms there is no /api/splats backend on this origin,
        // set this flag so we stop hammering the network with 404 fetches every
-        // tick. Remote (?backend=<url>) and live (?live=1) modes keep retrying so
-        // a transient outage doesn't permanently downgrade them.
+        // tick. Console stays clean; demo renders locally.
        var networkDisabled = false;

+        // Exponential backoff state for explicit ?backend=<url>. The user's
+        // local server may be down (ERR_CONNECTION_REFUSED) and we shouldn't
+        // hammer it 10 Hz indefinitely. After each failure we lengthen the
+        // delay; on success we snap back to the normal cadence.
+        var BASE_INTERVAL_MS = 250;
+        var MAX_INTERVAL_MS = 30000;
+        var currentIntervalMs = BASE_INTERVAL_MS;
+        var consecutiveFailures = 0;
+        var fetchTimer = null;
+        var lastBackendError = null;
+
+        function scheduleNextFetch(delayMs) {
+            if (fetchTimer) clearTimeout(fetchTimer);
+            fetchTimer = setTimeout(fetchCloud, delayMs);
+        }
+
        async function fetchCloud() {
-            // Demo-only mode: never hit the network.
+            // Demo-only mode: never hit the network. Use the normal cadence.
            if (backendArg === "demo" || networkDisabled) {
                transportMode = "demo";
                handleData(pickDemoFrame());
+                scheduleNextFetch(BASE_INTERVAL_MS);
                return;
            }
            try {
@ -493,18 +509,44 @@
                if (!resp.ok) throw new Error("HTTP " + resp.status);
                var data = await resp.json();
                transportMode = (backendArg === "auto") ? "live" : "remote";
+                consecutiveFailures = 0;
+                currentIntervalMs = BASE_INTERVAL_MS;
+                lastBackendError = null;
                handleData(data);
+                scheduleNextFetch(BASE_INTERVAL_MS);
            } catch (err) {
+                consecutiveFailures += 1;
+                lastBackendError = err && err.message ? err.message : String(err);
                if (requireLive) {
                    document.getElementById("stats").innerHTML =
-                        '<span class="demo">&#9679; OFFLINE</span><br>Live backend required (?live=1) but unreachable.<br><span class="label">' + (err && err.message ? err.message : err) + '</span>';
+                        '<span class="demo">&#9679; OFFLINE</span><br>Live backend required (?live=1) but unreachable.<br><span class="label">' + lastBackendError + '</span>';
+                    // Even strict-live: back off so we don't spam.
+                    currentIntervalMs = Math.min(currentIntervalMs * 2, MAX_INTERVAL_MS);
+                    scheduleNextFetch(currentIntervalMs);
                    return;
                }
-                // Auto mode + first failure → assume this is a static host (Pages)
-                // and stop polling. Console stays clean; demo renders locally.
-                if (backendArg === "auto") networkDisabled = true;
+                // Auto mode + first failure → assume static host (Pages), disable
+                // network entirely so the console stays clean.
+                if (backendArg === "auto") {
+                    networkDisabled = true;
+                    transportMode = "demo";
+                    handleData(pickDemoFrame());
+                    scheduleNextFetch(BASE_INTERVAL_MS);
+                    return;
+                }
+                // Explicit backend (?backend=<url>) — keep trying with
+                // exponential backoff: 250 ms → 500 ms → 1 s → 2 s … up to 30 s.
+                // Render the demo while we wait so the scene stays alive, and
+                // surface the failure so the user knows the server is down.
+                currentIntervalMs = Math.min(Math.max(BASE_INTERVAL_MS * Math.pow(2, consecutiveFailures - 1), 1000), MAX_INTERVAL_MS);
                transportMode = "demo";
-                handleData(pickDemoFrame());
+                var demoFrame = pickDemoFrame();
+                demoFrame._backendUnreachable = true;
+                demoFrame._backendUrl = backendArg;
+                demoFrame._backendError = lastBackendError;
+                demoFrame._retryInMs = currentIntervalMs;
+                handleData(demoFrame);
+                scheduleNextFetch(currentIntervalMs);
            }
        }

@ -564,6 +606,21 @@
                        + "Splats: " + splatCount + "<br>"
                        + "Frame: " + data.frame;

+                    // Unreachable backend banner — explicit ?backend=<url> failed
+                    // to connect. Show actionable guidance instead of leaving the
+                    // user staring at a "demo" badge wondering why their ESP32
+                    // feed isn't visible.
+                    if (data._backendUnreachable) {
+                        var nextSec = Math.round((data._retryInMs || 1000) / 1000);
+                        html += '<div class="section">'
+                            + '<span class="demo">&#9679; ' + data._backendUrl + '</span> unreachable'
+                            + '<br><span class="label">' + (data._backendError || "connection failed") + '</span>'
+                            + '<br><span class="label">retry in ' + nextSec + 's</span>'
+                            + '<br><br><span class="label">start the server:</span>'
+                            + '<br><code style="color:#e8a634">cargo run -p wifi-densepose-pointcloud --release \\<br>&nbsp;&nbsp;-- serve --bind 127.0.0.1:9880</code>'
+                            + '</div>';
+                    }
+
                    // CSI frame rate
                    html += '<div class="section">'
                        + '<span class="label">CSI Rate:</span> '
@ -676,8 +733,9 @@
            });
        })();

+        // fetchCloud self-schedules via setTimeout — no setInterval to avoid
+        // overlapping calls on slow networks and to support exponential backoff.
        fetchCloud();
-        setInterval(fetchCloud, 100); // 10 Hz — denser updates so face mesh feels live and the spiral animates smoothly

        function updateSplats(splats) {
            if (pointsMesh) scene.remove(pointsMesh);