test(calibration): self-contained end-to-end regression test
The committed calibration service (model.py/calibrate.py/infer.py) had no automated test — only ad-hoc verification. Adds a CPU-only, no-real-checkpoint test that exercises the CLI end-to-end on synthetic data: build base -> calibrate.py fits adapter -> infer.py runs base+adapter, asserting adapter size (<200KB), keypoint shape [N,17,2], finiteness, [0,1] range, and that the adapter actually changes the output. Passes on Windows CPU (torch 2.11). Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
1b48b6f5c8
commit
76cc57294d
|
|
@ -0,0 +1,103 @@
|
|||
"""Self-contained regression test for the RuView calibration service.
|
||||
|
||||
Exercises the committed CLI end-to-end on synthetic data (CPU, no GPU, no real checkpoint):
|
||||
build a base -> calibrate.py fits an adapter -> infer.py runs base+adapter -> assert the
|
||||
adapter is small, inference is shape-correct and finite, and the adapter actually changes output.
|
||||
|
||||
Run: python test_calibration.py (or via pytest)
|
||||
"""
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
HERE = Path(__file__).parent
|
||||
sys.path.insert(0, str(HERE))
|
||||
from model import PoseNet, standardize # noqa: E402
|
||||
|
||||
|
||||
def _make_base(path: Path):
|
||||
torch.manual_seed(0)
|
||||
net = PoseNet()
|
||||
# Save without the deterministic gr.A buffer (mirrors the published checkpoint;
|
||||
# calibrate.py/infer.py load with strict=False).
|
||||
sd = {k: v for k, v in net.state_dict().items() if k != "gr.A"}
|
||||
torch.save(sd, path)
|
||||
|
||||
|
||||
def _make_data(path: Path, n: int, seed: int):
|
||||
rng = np.random.default_rng(seed)
|
||||
X = rng.standard_normal((n, 3, 114, 10)).astype(np.float32)
|
||||
Y = rng.random((n, 17, 2)).astype(np.float32) # keypoints in [0,1]
|
||||
np.savez(path, X=X, Y=Y)
|
||||
|
||||
|
||||
def _run(*args):
|
||||
r = subprocess.run(
|
||||
[sys.executable, str(HERE / args[0]), *map(str, args[1:])],
|
||||
capture_output=True, text=True,
|
||||
)
|
||||
assert r.returncode == 0, f"{args[0]} failed:\n{r.stdout}\n{r.stderr}"
|
||||
return r.stdout
|
||||
|
||||
|
||||
def test_calibration_end_to_end():
|
||||
with tempfile.TemporaryDirectory() as d:
|
||||
d = Path(d)
|
||||
base = d / "base.pt"
|
||||
calib = d / "calib.npz"
|
||||
frames = d / "frames.npz"
|
||||
adapter = d / "room.adapter.npz"
|
||||
kp = d / "kp.npy"
|
||||
|
||||
_make_base(base)
|
||||
_make_data(calib, n=40, seed=1) # ≥20 → no underfit warning
|
||||
_make_data(frames, n=16, seed=2)
|
||||
|
||||
# 1) calibrate -> adapter
|
||||
out = _run("calibrate.py", "--base", base, "--data", calib, "--out", adapter,
|
||||
"--iters", "50", "--device", "cpu")
|
||||
assert adapter.exists(), "adapter not written"
|
||||
assert "saved" in out.lower()
|
||||
sz = adapter.stat().st_size
|
||||
assert sz < 200_000, f"adapter unexpectedly large ({sz} bytes)"
|
||||
|
||||
# adapter contains the expected LoRA tensors (materialize + close so the
|
||||
# Windows tempdir can be cleaned up — np.load keeps a lazy file handle).
|
||||
with np.load(adapter) as z:
|
||||
keys = [k for k in z.files if k.endswith(".A") or k.endswith(".B")]
|
||||
assert keys, f"adapter has no LoRA tensors: {z.files}"
|
||||
lora = {k: z[k].astype(np.float32) for k in keys}
|
||||
|
||||
# 2) infer with adapter -> keypoints
|
||||
_run("infer.py", "--base", base, "--adapter", adapter, "--data", frames,
|
||||
"--out", kp, "--device", "cpu")
|
||||
out_kp = np.load(kp)
|
||||
assert out_kp.shape == (16, 17, 2), f"bad keypoint shape {out_kp.shape}"
|
||||
assert np.isfinite(out_kp).all(), "non-finite keypoints"
|
||||
assert (out_kp >= 0).all() and (out_kp <= 1).all(), "keypoints out of [0,1]"
|
||||
|
||||
# 3) adapter must actually change the output vs the zero-shot base
|
||||
with np.load(frames) as fz:
|
||||
frames_x = fz["X"][:]
|
||||
net = PoseNet()
|
||||
net.load_state_dict(torch.load(base, map_location="cpu"), strict=False)
|
||||
net.eval()
|
||||
x = standardize(torch.tensor(frames_x))
|
||||
with torch.no_grad():
|
||||
base_kp = net(x).reshape(16, 17, 2).numpy()
|
||||
net.add_lora()
|
||||
net.load_lora(lora)
|
||||
net.eval()
|
||||
with torch.no_grad():
|
||||
cal_kp = net(x).reshape(16, 17, 2).numpy()
|
||||
assert np.abs(base_kp - cal_kp).sum() > 1e-4, "adapter did not change output"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_calibration_end_to_end()
|
||||
print("PASS: calibration service end-to-end (calibrate -> adapter -> infer)")
|
||||
Loading…
Reference in New Issue