wifi-densepose/harness/ruview/test/tools.test.mjs

112 lines
4.3 KiB
JavaScript

// SPDX-License-Identifier: MIT
// RuView harness tests — Node's built-in test runner (no devDeps to install).
// Run: `node --test test/` (or `npm test`).
import { test } from 'node:test';
import assert from 'node:assert/strict';
import { claimCheck, summarize } from '../src/guardrails.js';
import { TOOLS, runTool, listTools, findRepoRoot } from '../src/tools.js';
import { run } from '../bin/cli.js';
test('guardrail flags the retracted 100% framing as high severity', () => {
const r = claimCheck('Our model reaches 100% accuracy on every pose.');
assert.equal(r.ok, false);
assert.ok(r.findings.some((f) => f.severity === 'high'));
});
test('guardrail flags an untagged percentage accuracy claim', () => {
// "hit", not "measured" — "measured" would (correctly) route to the no-reproducer branch.
const r = claimCheck('We hit 92.9% PCK on the test set.');
assert.equal(r.ok, false);
assert.ok(r.findings.some((f) => /not tagged/i.test(f.reason)));
});
test('guardrail passes a MEASURED claim that cites a reproducer', () => {
const r = claimCheck('Held-out PCK@20 59.5% vs 50% mean-pose baseline = +9.4pp (MEASURED, verify.py).');
assert.equal(r.ok, true, JSON.stringify(r.findings));
});
test('guardrail flags MEASURED with no reproducer', () => {
const r = claimCheck('Presence detection 97% (MEASURED).');
assert.equal(r.ok, false);
assert.ok(r.findings.some((f) => /no reproducer/i.test(f.reason)));
});
test('guardrail ignores non-metric prose', () => {
assert.equal(claimCheck('The ESP32 streams CSI over UDP to the sensing-server.').ok, true);
assert.equal(claimCheck('').ok, true);
});
test('summarize gives PASS/finding text', () => {
assert.match(summarize(claimCheck('nothing here')), /PASS/);
assert.match(summarize(claimCheck('100% accuracy')), /finding/);
});
test('registry exposes the documented tools with schemas', () => {
const names = Object.keys(TOOLS);
for (const n of ['ruview.onboard', 'ruview.claim_check', 'ruview.verify', 'ruview.node_monitor', 'ruview.calibrate', 'ruview.node_flash']) {
assert.ok(names.includes(n), `missing ${n}`);
assert.equal(TOOLS[n].inputSchema.type, 'object');
}
assert.equal(listTools().length, names.length);
});
test('ruview.onboard returns paths and a recommendation', () => {
const r = runTool('ruview.onboard', {});
assert.equal(r.ok, true);
assert.ok(r.paths['live-esp32']);
assert.ok(['repo-build', 'docker-demo'].includes(r.recommend));
});
test('ruview.claim_check tool wraps the guardrail', () => {
const r = runTool('ruview.claim_check', { text: '100% accuracy' });
assert.equal(r.ok, false);
assert.match(r.summary, /honesty|tag|MEASURED|finding/i);
});
test('unknown tool fails closed', () => {
const r = runTool('ruview.does_not_exist', {});
assert.equal(r.ok, false);
assert.equal(r.reason, 'unknown_tool');
});
test('node_monitor fails closed without a port', () => {
const r = runTool('ruview.node_monitor', {});
assert.equal(r.ok, false);
assert.equal(r.reason, 'no_port');
});
test('node_flash refuses without confirm (mutating guard)', () => {
const r = runTool('ruview.node_flash', { port: 'COM8', variant: 's3-8mb' });
assert.equal(r.ok, false);
// either not-confirmed (win32) or unsupported_platform (posix) — both fail-closed
assert.ok(['not_confirmed', 'unsupported_platform'].includes(r.reason));
});
test('verify fails closed when not in a RuView repo', () => {
// point at a tmp dir with no repo markers
const r = runTool('ruview.verify', { repo: process.platform === 'win32' ? 'C:/Windows/Temp' : '/tmp' });
assert.equal(r.ok, false);
assert.ok(['proof_missing', 'python_missing'].includes(r.reason), r.reason);
});
test('CLI run(): claim-check exits non-zero on a bad claim', async () => {
const code = await run(['claim-check', '--text', '100% accuracy']);
assert.notEqual(code, 0);
});
test('CLI run(): doctor exits 0 (tools-only path)', async () => {
const code = await run(['doctor']);
assert.equal(code, 0);
});
test('CLI run(): unknown command exits non-zero', async () => {
assert.notEqual(await run(['definitely-not-a-command']), 0);
});
test('findRepoRoot locates this monorepo from cwd', () => {
// when run from within wifi-densepose, it should find a root; elsewhere null is fine
const root = findRepoRoot();
assert.ok(root === null || typeof root === 'string');
});