feat: 26-keypoint dexterous pose + full RuVector attention pipeline

Pose Decoder (17 → 26 keypoints):
- Add finger approximations: thumb, index, pinky per hand (6 new)
- Add toe tips: left/right foot index (2 new)
- Add neck keypoint (1 new)
- Hand openness driven by arm motion intensity
- Finger positions computed from wrist-elbow axis angles

CNN Embedder (full RuVector WASM pipeline):
- Stage 1: Multi-Head Attention (global spatial reasoning)
- Stage 2: Hyperbolic Attention (hierarchical body-part tree)
- Stage 3: MoE Attention (3 experts: upper/lower/extremities, top-2)
- Blended 40/30/30 weighting → final embedding projection

Canvas Renderer:
- Magenta finger joints with distinct glow
- Cyan toe tips
- White neck keypoint
- Thinner limb lines for hand/foot connections
- Joint count shown in overlay label

CSI Simulator:
- Skip synthetic person state when live ESP32 connected
- Only simulate CSI data in demo mode (was already correct)

Embedding Space:
- Fixed projection: sparse 8-dim projection replaces cancelling sum
- Auto-scaling normalizes point spread to fill canvas

Cache busters bumped to v=5 on all imports.

Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
ruv 2026-03-12 19:28:10 -04:00
parent 1bc56fc4be
commit 0ef1252678
6 changed files with 206 additions and 53 deletions

View File

@ -4,7 +4,7 @@
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>WiFi-DensePose — Dual-Modal Pose Estimation</title>
<link rel="stylesheet" href="pose-fusion/css/style.css">
<link rel="stylesheet" href="pose-fusion/css/style.css?v=5">
</head>
<body>
@ -78,7 +78,24 @@
<div class="panel">
<div class="panel-title">&#9670; CSI Amplitude Heatmap</div>
<div class="csi-canvas-wrapper">
<canvas id="csi-canvas" width="320" height="120"></canvas>
<canvas id="csi-canvas" width="320" height="100"></canvas>
</div>
</div>
<!-- RSSI Signal Strength -->
<div class="panel">
<div class="panel-title">&#9670; RSSI Signal Strength</div>
<div class="rssi-row">
<div class="rssi-gauge">
<div class="rssi-bar-track">
<div class="rssi-bar-fill" id="rssi-bar" style="width:0%"></div>
</div>
<div class="rssi-values">
<span class="rssi-dbm" id="rssi-value">-- dBm</span>
<span class="rssi-quality" id="rssi-quality">--</span>
</div>
</div>
<canvas id="rssi-sparkline" width="160" height="32"></canvas>
</div>
</div>
@ -86,7 +103,7 @@
<div class="panel">
<div class="panel-title">&#9670; Embedding Space (2D Projection)</div>
<div class="embedding-canvas-wrapper">
<canvas id="embedding-canvas" width="320" height="140"></canvas>
<canvas id="embedding-canvas" width="320" height="100"></canvas>
</div>
</div>
@ -155,6 +172,6 @@
</div><!-- /main-grid -->
<script type="module" src="pose-fusion/js/main.js"></script>
<script type="module" src="pose-fusion/js/main.js?v=5"></script>
</body>
</html>

View File

@ -37,12 +37,18 @@ export class CanvasRenderer {
const limbColor = color === 'amber' ? this.colors.csiLimb : this.colors.limb;
const glowColor = color === 'amber' ? 'rgba(255,176,32,0.4)' : this.colors.jointGlow;
// Extended keypoint styling
const fingerColor = '#ff6ef0'; // Magenta for finger tips
const fingerGlow = 'rgba(255,110,240,0.4)';
const fingerLimb = 'rgba(255,110,240,0.5)';
const toeColor = '#6ef0ff'; // Cyan for toes
const neckColor = '#ffffff'; // White for neck
ctx.clearRect(0, 0, width, height);
if (!keypoints || keypoints.length === 0) return;
// Draw limbs first (behind joints)
ctx.lineWidth = 3;
ctx.lineCap = 'round';
for (const [i, j] of SKELETON_CONNECTIONS) {
@ -54,18 +60,22 @@ export class CanvasRenderer {
const bx = kpB.x * width, by = kpB.y * height;
const avgConf = (kpA.confidence + kpB.confidence) / 2;
// Is this a hand/finger connection? (indices 17-22)
const isFingerLink = i >= 17 && i <= 22 || j >= 17 && j <= 22;
const isToeLink = i >= 23 && i <= 24 || j >= 23 && j <= 24;
// Glow
ctx.strokeStyle = this.colors.limbGlow;
ctx.lineWidth = 8;
ctx.globalAlpha = avgConf * 0.4;
ctx.strokeStyle = isFingerLink ? fingerLimb : this.colors.limbGlow;
ctx.lineWidth = isFingerLink ? 4 : 8;
ctx.globalAlpha = avgConf * (isFingerLink ? 0.3 : 0.4);
ctx.beginPath();
ctx.moveTo(ax, ay);
ctx.lineTo(bx, by);
ctx.stroke();
// Main line
ctx.strokeStyle = limbColor;
ctx.lineWidth = 2.5;
ctx.strokeStyle = isFingerLink ? fingerColor : isToeLink ? toeColor : limbColor;
ctx.lineWidth = isFingerLink || isToeLink ? 1.5 : 2.5;
ctx.globalAlpha = avgConf;
ctx.beginPath();
ctx.moveTo(ax, ay);
@ -75,43 +85,52 @@ export class CanvasRenderer {
// Draw joints
ctx.globalAlpha = 1;
for (const kp of keypoints) {
for (let idx = 0; idx < keypoints.length; idx++) {
const kp = keypoints[idx];
if (!kp || kp.confidence < minConf) continue;
const x = kp.x * width;
const y = kp.y * height;
const r = 3 + kp.confidence * 3;
const isFinger = idx >= 17 && idx <= 22;
const isToe = idx >= 23 && idx <= 24;
const isNeck = idx === 25;
const r = isFinger ? 2 + kp.confidence * 2 : isToe ? 2 : 3 + kp.confidence * 3;
const jColor = isFinger ? fingerColor : isToe ? toeColor : isNeck ? neckColor : jointColor;
const gColor = isFinger ? fingerGlow : glowColor;
// Glow
ctx.beginPath();
ctx.arc(x, y, r + 4, 0, Math.PI * 2);
ctx.fillStyle = glowColor;
ctx.globalAlpha = kp.confidence * 0.6;
ctx.arc(x, y, r + (isFinger ? 3 : 4), 0, Math.PI * 2);
ctx.fillStyle = gColor;
ctx.globalAlpha = kp.confidence * (isFinger ? 0.5 : 0.6);
ctx.fill();
// Joint dot
ctx.beginPath();
ctx.arc(x, y, r, 0, Math.PI * 2);
ctx.fillStyle = jointColor;
ctx.fillStyle = jColor;
ctx.globalAlpha = kp.confidence;
ctx.fill();
// White center
ctx.beginPath();
ctx.arc(x, y, r * 0.4, 0, Math.PI * 2);
ctx.fillStyle = '#fff';
ctx.globalAlpha = kp.confidence * 0.8;
ctx.fill();
// White center (body joints only)
if (!isFinger && !isToe) {
ctx.beginPath();
ctx.arc(x, y, r * 0.4, 0, Math.PI * 2);
ctx.fillStyle = '#fff';
ctx.globalAlpha = kp.confidence * 0.8;
ctx.fill();
}
}
ctx.globalAlpha = 1;
// Confidence label
// Confidence label + keypoint count
if (opts.label) {
const visCount = keypoints.filter(kp => kp && kp.confidence >= minConf).length;
ctx.font = '11px "JetBrains Mono", monospace';
ctx.fillStyle = jointColor;
ctx.globalAlpha = 0.8;
ctx.fillText(opts.label, 8, height - 8);
ctx.fillText(`${opts.label} · ${visCount} joints`, 8, height - 8);
ctx.globalAlpha = 1;
}
}

View File

@ -34,6 +34,8 @@ export class CnnEmbedder {
this.wasmEmbedder = null;
this.rvAttention = null; // RuVector Multi-Head Attention (WASM)
this.rvFlash = null; // RuVector Flash Attention (WASM)
this.rvHyperbolic = null; // RuVector Hyperbolic Attention (hierarchical body)
this.rvMoE = null; // RuVector Mixture-of-Experts (body-region routing)
this.rvModule = null; // RuVector WASM module reference
this.useRuVector = false;
@ -82,9 +84,13 @@ export class CnnEmbedder {
this.rvAttention = new mod.WasmMultiHeadAttention(16, 4);
// Create Flash Attention for larger sequences
this.rvFlash = new mod.WasmFlashAttention(16, 8);
// Hyperbolic Attention for hierarchical body-part modeling (Poincaré ball, curvature=-1)
this.rvHyperbolic = new mod.WasmHyperbolicAttention(16, -1.0);
// MoE: 3 experts (upper-body, lower-body, extremities), top-2 active
this.rvMoE = new mod.WasmMoEAttention(16, 3, 2);
this.rvModule = mod;
this.useRuVector = true;
console.log(`[CNN] RuVector Attention WASM v${mod.version()} loaded — Multi-Head + Flash Attention active`);
console.log(`[CNN] RuVector Attention WASM v${mod.version()} loaded — MHA + Flash + Hyperbolic + MoE active`);
return true;
} catch (e) {
console.log('[CNN] RuVector Attention WASM not available:', e.message);
@ -198,9 +204,11 @@ export class CnnEmbedder {
}
/**
* Extract embedding using RuVector Multi-Head Attention WASM.
* Treats conv feature map spatial positions as sequence tokens,
* applies self-attention, then projects to embedding dimension.
* Extract embedding using full RuVector attention pipeline:
* 1. Multi-Head Attention (global spatial reasoning)
* 2. Hyperbolic Attention (hierarchical body-part structure)
* 3. MoE Attention (body-region specialized experts)
* 4. Concatenate + project final embedding
*/
_extractWithAttention(convOut, numTokens, channels) {
// Subsample spatial tokens for attention (keep it fast: max 64 tokens)
@ -215,33 +223,62 @@ export class CnnEmbedder {
tokens.push(token);
}
// Use first token as query, all tokens as keys/values (self-attention)
// Average multiple query positions for robust embedding
const numQueries = Math.min(4, tokens.length);
const queryStride = Math.floor(tokens.length / numQueries);
const attended = new Float32Array(channels);
// === Stage 1: Multi-Head Attention (global spatial reasoning) ===
const mhaOut = new Float32Array(channels);
for (let q = 0; q < numQueries; q++) {
const queryToken = tokens[q * queryStride];
try {
const result = this.rvAttention.compute(queryToken, tokens, tokens);
for (let c = 0; c < channels; c++) {
attended[c] += result[c] / numQueries;
}
for (let c = 0; c < channels; c++) mhaOut[c] += result[c] / numQueries;
} catch (_) {
// Fallback: just average the tokens
for (let c = 0; c < channels; c++) {
attended[c] += queryToken[c] / numQueries;
}
for (let c = 0; c < channels; c++) mhaOut[c] += queryToken[c] / numQueries;
}
}
// Project attended features → embeddingDim
// === Stage 2: Hyperbolic Attention (hierarchical body structure) ===
const hyOut = new Float32Array(channels);
if (this.rvHyperbolic) {
try {
// Use MHA output as query against spatial tokens — captures parent→child relationships
const result = this.rvHyperbolic.compute(mhaOut, tokens, tokens);
for (let c = 0; c < channels; c++) hyOut[c] = result[c];
} catch (_) {
hyOut.set(mhaOut);
}
} else {
hyOut.set(mhaOut);
}
// === Stage 3: MoE Attention (body-region experts) ===
const moeOut = new Float32Array(channels);
if (this.rvMoE) {
try {
// MoE routes tokens to specialized experts and combines
const result = this.rvMoE.compute(hyOut, tokens, tokens);
for (let c = 0; c < channels; c++) moeOut[c] = result[c];
} catch (_) {
moeOut.set(hyOut);
}
} else {
moeOut.set(hyOut);
}
// === Stage 4: Concatenate all three heads + project ===
// Blend: 40% MHA (global), 30% Hyperbolic (hierarchy), 30% MoE (regions)
const blended = new Float32Array(channels);
for (let c = 0; c < channels; c++) {
blended[c] = 0.4 * mhaOut[c] + 0.3 * hyOut[c] + 0.3 * moeOut[c];
}
// Project to embeddingDim
const emb = new Float32Array(this.embeddingDim);
for (let o = 0; o < this.embeddingDim; o++) {
let sum = 0;
for (let i = 0; i < channels; i++) {
sum += attended[i] * this.attnProjWeights[i * this.embeddingDim + o];
sum += blended[i] * this.attnProjWeights[i * this.embeddingDim + o];
}
emb[o] = sum;
}

View File

@ -79,6 +79,9 @@ export class CsiSimulator {
* (simulating through-wall sensing capability).
*/
updatePersonState(presence, x, y, motion) {
// Don't override real CSI sensing with synthetic video-derived state
if (this.mode === 'live') return;
if (presence > 0.1) {
// Person detected in video — update CSI state directly
this.personPresence = presence;

View File

@ -4,12 +4,12 @@
* Main orchestration: video capture CNN embedding CSI processing fusion rendering
*/
import { VideoCapture } from './video-capture.js?v=4';
import { CsiSimulator } from './csi-simulator.js?v=4';
import { CnnEmbedder } from './cnn-embedder.js?v=4';
import { FusionEngine } from './fusion-engine.js?v=4';
import { PoseDecoder } from './pose-decoder.js?v=4';
import { CanvasRenderer } from './canvas-renderer.js?v=4';
import { VideoCapture } from './video-capture.js?v=5';
import { CsiSimulator } from './csi-simulator.js?v=5';
import { CnnEmbedder } from './cnn-embedder.js?v=5';
import { FusionEngine } from './fusion-engine.js?v=5';
import { PoseDecoder } from './pose-decoder.js?v=5';
import { CanvasRenderer } from './canvas-renderer.js?v=5';
// === State ===
let mode = 'dual'; // 'dual' | 'video' | 'csi'

View File

@ -9,24 +9,35 @@
* When person exits frame, CSI data continues tracking (through-wall mode).
*/
// COCO keypoint definitions
// Extended keypoint definitions: 17 COCO + 9 hand/fingertip approximations = 26 total
export const KEYPOINT_NAMES = [
'nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear',
'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow',
'left_wrist', 'right_wrist', 'left_hip', 'right_hip',
'left_knee', 'right_knee', 'left_ankle', 'right_ankle'
'left_knee', 'right_knee', 'left_ankle', 'right_ankle',
// Extended: hand keypoints (17-25)
'left_thumb', 'left_index', 'left_pinky', // 17, 18, 19
'right_thumb', 'right_index', 'right_pinky', // 20, 21, 22
'left_foot_index', 'right_foot_index', // 23, 24 (toe tips)
'neck', // 25 (mid-shoulder)
];
// Skeleton connections (pairs of keypoint indices)
export const SKELETON_CONNECTIONS = [
[0, 1], [0, 2], [1, 3], [2, 4], // Head
[5, 6], // Shoulders
[0, 25], // Nose → neck
[25, 5], [25, 6], // Neck → shoulders
[5, 7], [7, 9], // Left arm
[6, 8], [8, 10], // Right arm
[5, 11], [6, 12], // Torso
[11, 12], // Hips
[11, 13], [13, 15], // Left leg
[12, 14], [14, 16], // Right leg
// Hand connections
[9, 17], [9, 18], [9, 19], // Left wrist → fingers
[10, 20], [10, 21], [10, 22], // Right wrist → fingers
// Foot connections
[15, 23], [16, 24], // Ankles → toes
];
// Standard body proportions (relative to body height)
@ -41,6 +52,12 @@ const PROPORTIONS = {
kneeToAnkle: 0.24,
eyeSpacing: 0.04,
earSpacing: 0.07,
// Hand proportions
wristToFinger: 0.09,
fingerSpread: 0.04,
thumbAngle: 0.6, // radians from wrist-elbow axis
// Foot proportions
ankleToToe: 0.06,
};
export class PoseDecoder {
@ -191,6 +208,26 @@ export class PoseDecoder {
const legMotion = grid ? this._analyzeLegMotion(grid, cols, rows) : { left: 0, right: 0 };
const legSwing = 0.015;
// Compute hand finger positions from wrist-elbow axis
const lHandAngle = Math.atan2(lWristY - lElbowY, lWristX - lElbowX);
const rHandAngle = Math.atan2(rWristY - rElbowY, rWristX - rElbowX);
const fingerLen = P.wristToFinger * bodyH;
const fingerSpr = P.fingerSpread * bodyH;
// Hand openness driven by motion intensity (more motion = more spread)
const lHandOpen = Math.min(1, leftArmRaise * 0.5 + (this._leftArmX || 0) * 0.5);
const rHandOpen = Math.min(1, rightArmRaise * 0.5 + (this._rightArmX || 0) * 0.5);
// Left ankle/knee positions
const lAnkleX = cx - hipHalfW + legMotion.left * legSwing * 1.3;
const rAnkleX = cx + hipHalfW + legMotion.right * legSwing * 1.3;
const lKneeX = cx - hipHalfW + legMotion.left * legSwing;
const rKneeX = cx + hipHalfW + legMotion.right * legSwing;
// Neck (midpoint between shoulders)
const neckX = cx;
const neckY = shoulderY - P.headToShoulder * bodyH * 0.35;
const keypoints = [
// 0: nose
{ x: headX, y: headY + 0.01, confidence: 0.92 },
@ -219,13 +256,53 @@ export class PoseDecoder {
// 12: right_hip
{ x: cx + hipHalfW, y: hipY, confidence: 0.91 },
// 13: left_knee
{ x: cx - hipHalfW + legMotion.left * legSwing, y: kneeY, confidence: 0.88 },
{ x: lKneeX, y: kneeY, confidence: 0.88 },
// 14: right_knee
{ x: cx + hipHalfW + legMotion.right * legSwing, y: kneeY, confidence: 0.88 },
{ x: rKneeX, y: kneeY, confidence: 0.88 },
// 15: left_ankle
{ x: cx - hipHalfW + legMotion.left * legSwing * 1.3, y: ankleY, confidence: 0.83 },
{ x: lAnkleX, y: ankleY, confidence: 0.83 },
// 16: right_ankle
{ x: cx + hipHalfW + legMotion.right * legSwing * 1.3, y: ankleY, confidence: 0.83 },
{ x: rAnkleX, y: ankleY, confidence: 0.83 },
// === Extended keypoints (17-25) ===
// 17: left_thumb — offset at thumb angle from wrist-elbow axis
{ x: lWristX + fingerLen * Math.cos(lHandAngle + P.thumbAngle) * (0.6 + lHandOpen * 0.4),
y: lWristY + fingerLen * Math.sin(lHandAngle + P.thumbAngle) * (0.6 + lHandOpen * 0.4),
confidence: 0.68 * (0.5 + lHandOpen * 0.5) },
// 18: left_index — extends along wrist-elbow axis
{ x: lWristX + fingerLen * Math.cos(lHandAngle) + fingerSpr * lHandOpen * Math.cos(lHandAngle + 0.3),
y: lWristY + fingerLen * Math.sin(lHandAngle) + fingerSpr * lHandOpen * Math.sin(lHandAngle + 0.3),
confidence: 0.72 * (0.5 + lHandOpen * 0.5) },
// 19: left_pinky — offset opposite thumb
{ x: lWristX + fingerLen * 0.85 * Math.cos(lHandAngle - P.thumbAngle * 0.7),
y: lWristY + fingerLen * 0.85 * Math.sin(lHandAngle - P.thumbAngle * 0.7),
confidence: 0.60 * (0.5 + lHandOpen * 0.5) },
// 20: right_thumb
{ x: rWristX + fingerLen * Math.cos(rHandAngle - P.thumbAngle) * (0.6 + rHandOpen * 0.4),
y: rWristY + fingerLen * Math.sin(rHandAngle - P.thumbAngle) * (0.6 + rHandOpen * 0.4),
confidence: 0.68 * (0.5 + rHandOpen * 0.5) },
// 21: right_index
{ x: rWristX + fingerLen * Math.cos(rHandAngle) + fingerSpr * rHandOpen * Math.cos(rHandAngle - 0.3),
y: rWristY + fingerLen * Math.sin(rHandAngle) + fingerSpr * rHandOpen * Math.sin(rHandAngle - 0.3),
confidence: 0.72 * (0.5 + rHandOpen * 0.5) },
// 22: right_pinky
{ x: rWristX + fingerLen * 0.85 * Math.cos(rHandAngle + P.thumbAngle * 0.7),
y: rWristY + fingerLen * 0.85 * Math.sin(rHandAngle + P.thumbAngle * 0.7),
confidence: 0.60 * (0.5 + rHandOpen * 0.5) },
// 23: left_foot_index (toe tip) — extends forward from ankle
{ x: lAnkleX + P.ankleToToe * bodyH * 0.5,
y: ankleY + P.ankleToToe * bodyH * 0.3,
confidence: 0.65 },
// 24: right_foot_index
{ x: rAnkleX + P.ankleToToe * bodyH * 0.5,
y: ankleY + P.ankleToToe * bodyH * 0.3,
confidence: 0.65 },
// 25: neck (midpoint between shoulders, slightly above)
{ x: neckX, y: neckY, confidence: 0.93 },
];
for (let i = 0; i < keypoints.length; i++) {