From 15a983b55564dfb77013e61c69e405f6f3fc0732 Mon Sep 17 00:00:00 2001 From: ruv Date: Wed, 17 Jun 2026 10:17:12 -0400 Subject: [PATCH] fix(paired-data): 4 bugs corrupting/blocking camera-supervised training data (#1007) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. record-csi-udp.py stamped LOCAL time with a 'Z' (UTC) suffix → camera/CSI disagreed by the UTC offset → 0 aligned pairs. Now writes true UTC via datetime.now(timezone.utc). 2. align-ground-truth.js kept empty-keypoint (non-detection) records at confidence 0, collapsing window avgConf below threshold → all windows rejected. Now skipped at load. 3. extractCsiMatrix silently zero-padded/truncated mixed-subcarrier frames. Now frames are filtered to the session's modal subcarrier count before windowing — never padded. 4. CSI/feature matrices are filled frame-major but were labeled shape [nSc, nFrames] — transposed. Labels corrected to [nFrames, nSc] / [nFrames, dim]. Co-Authored-By: claude-flow --- scripts/align-ground-truth.js | 38 +++++++++++++++++++++++++++++------ scripts/record-csi-udp.py | 4 +++- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/scripts/align-ground-truth.js b/scripts/align-ground-truth.js index 6fb39260..ad8e06af 100644 --- a/scripts/align-ground-truth.js +++ b/scripts/align-ground-truth.js @@ -184,7 +184,9 @@ function loadGroundTruth(filePath) { const raw = loadJsonl(filePath); const frames = []; for (const r of raw) { - if (r.ts_ns == null || !r.keypoints) continue; + // Skip non-detection frames (empty keypoints []) — they must not dilute window + // confidence; confidence stats are over actual detections only (#1007 Bug 2). + if (r.ts_ns == null || !r.keypoints || r.keypoints.length === 0) continue; frames.push({ tsMs: cameraTsToMs(r.ts_ns), keypoints: r.keypoints, @@ -266,7 +268,29 @@ function loadCsi(filePath) { // Sort by timestamp rawCsi.sort((a, b) => a.tsMs - b.tsMs); features.sort((a, b) => a.tsMs - b.tsMs); - return { rawCsi, features }; + + // Bug 3 (#1007): keep only frames at the session's MODAL subcarrier count so windows + // are homogeneous; never silently zero-pad/truncate the off-format frames the ESP32 + // emits (HT20/HT40/fragments). extractCsiMatrix then sees uniform-width frames. + return { rawCsi: filterToModalSubcarriers(rawCsi), features }; +} + +/** + * Keep only frames whose subcarrier count equals the session's modal (most common) + * count. Off-format frames are dropped (logged), not padded — prevents the silent + * zero-padding that corrupted windows in #1007. + */ +function filterToModalSubcarriers(frames) { + if (frames.length === 0) return frames; + const counts = new Map(); + for (const f of frames) counts.set(f.subcarriers, (counts.get(f.subcarriers) || 0) + 1); + let modal = frames[0].subcarriers, best = 0; + for (const [sc, n] of counts) if (n > best) { best = n; modal = sc; } + const kept = frames.filter((f) => f.subcarriers === modal); + if (kept.length !== frames.length) { + console.error(`[align] #1007: kept ${kept.length}/${frames.length} CSI frames at modal subcarrier count ${modal} (dropped ${frames.length - kept.length} off-format; no silent padding)`); + } + return kept; } // --------------------------------------------------------------------------- @@ -343,7 +367,8 @@ function averageKeypoints(cameraFrames) { /** * Extract CSI amplitude matrix from raw_csi window. - * Returns { data: flat Float32Array, shape: [subcarriers, windowFrames] }. + * Fill is frame-major (matrix[f*nSc + s]), so shape is [windowFrames, subcarriers] + * (#1007 Bug 4 — was mislabeled [subcarriers, windowFrames], transposing consumers). */ function extractCsiMatrix(window) { const nFrames = window.length; @@ -363,12 +388,13 @@ function extractCsiMatrix(window) { } } - return { data: Array.from(matrix), shape: [nSc, nFrames] }; + return { data: Array.from(matrix), shape: [nFrames, nSc] }; } /** * Extract feature matrix from feature-type window. - * Returns { data: flat array, shape: [featureDim, windowFrames] }. + * Fill is frame-major (matrix[f*dim + d]), so shape is [windowFrames, featureDim] + * (#1007 Bug 4 — was mislabeled [featureDim, windowFrames]). */ function extractFeatureMatrix(window) { const nFrames = window.length; @@ -382,7 +408,7 @@ function extractFeatureMatrix(window) { } } - return { data: Array.from(matrix), shape: [dim, nFrames] }; + return { data: Array.from(matrix), shape: [nFrames, dim] }; } // --------------------------------------------------------------------------- diff --git a/scripts/record-csi-udp.py b/scripts/record-csi-udp.py index 2c0bdb11..1f74b27b 100644 --- a/scripts/record-csi-udp.py +++ b/scripts/record-csi-udp.py @@ -15,6 +15,7 @@ import os import socket import struct import time +from datetime import datetime, timezone def parse_csi_packet(data): @@ -41,7 +42,8 @@ def parse_csi_packet(data): return { "type": "raw_csi", - "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S.") + f"{int(time.time() * 1000) % 1000:03d}Z", + # true UTC, not local-time-labeled-Z (#1007 Bug 1) — e.g. "2026-06-17T01:23:45.678Z" + "timestamp": datetime.now(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z"), "ts_ns": time.time_ns(), "node_id": node_id, "rssi": rssi,