test(rvf): add JSONL container round-trip and error tests

Covers the new sniff + JSONL adapter end-to-end:

  - from_bytes_dispatches_to_jsonl_on_brace: feeds an in-memory copy
    of the exact bytes shipped at ruvnet/wifi-densepose-pretrained
    (model.rvf.jsonl, v1.0.0) through the public API and asserts the
    synthesised manifest exposes the real model id and version.
  - jsonl_sniff_tolerates_leading_whitespace: padding with \n \t still
    dispatches to JSONL.
  - jsonl_quantization_becomes_quant_segment: the quantization line
    surfaces verbatim through quant_info().
  - jsonl_preserves_other_lines_in_metadata: encoder, lora, ewc, and
    metadata lines all round-trip through metadata()["lines"].
  - jsonl_no_weights_segment_present: weights() returns None - the
    JSONL bundle does not carry f32 weights, by design.
  - jsonl_progressive_loader_layer_a_works: covers the integration
    point that previously broke - ProgressiveLoader::new + load_layer_a
    now reports the real model name on JSONL input.
  - jsonl_invalid_json_line_is_explicit / jsonl_missing_type_field /
    jsonl_blank_lines_only: every error path produces a "JSONL RVF"
    prefix and identifies the offending line, so failures surface to
    operators instead of degrading to null output.
  - jsonl_minimal_metadata_only: a single-line bundle still parses.
  - binary_error_mentions_jsonl_hint: corrupt binary input now points
    at the JSONL format in its error text.
This commit is contained in:
lockewerks 2026-05-25 16:54:52 -06:00
parent 5b9714bf61
commit c271d478c0
1 changed files with 134 additions and 0 deletions

View File

@ -1221,6 +1221,140 @@ mod tests {
assert!(reader.lora_profile("nonexistent").is_none());
}
// ── JSONL RVF container tests (HuggingFace bundle compatibility) ────────
/// Mirrors the exact bytes that ship in `ruvnet/wifi-densepose-pretrained`
/// at `model.rvf.jsonl` (HuggingFace bundle, v1.0.0).
const SAMPLE_HF_JSONL: &str = concat!(
"{\"type\":\"metadata\",\"name\":\"wifi-densepose-csi-embedding\",",
"\"version\":\"1.0.0\",\"architecture\":\"csi-encoder-8-64-128\",",
"\"training\":{\"steps\":12212300,\"loss\":0.065,\"learningRate\":0.001},",
"\"custom\":{\"inputDim\":8,\"hiddenDim\":64,\"embeddingDim\":128}}\n",
"{\"type\":\"encoder\",\"w1_shape\":[8,64],\"w2_shape\":[64,128]}\n",
"{\"type\":\"lora\",\"config\":{\"rank\":8,\"alpha\":16}}\n",
"{\"type\":\"ewc\",\"stats\":{\"tasksLearned\":4}}\n",
"{\"type\":\"quantization\",\"default_bits\":4,\"variants\":[2,4,8]}\n",
);
#[test]
fn from_bytes_dispatches_to_jsonl_on_brace() {
let reader = RvfReader::from_bytes(SAMPLE_HF_JSONL.as_bytes())
.expect("HF JSONL bundle should load via sniff");
let manifest = reader.manifest().expect("manifest should be synthesised");
assert_eq!(manifest["model_id"], "wifi-densepose-csi-embedding");
assert_eq!(manifest["version"], "1.0.0");
assert_eq!(manifest["description"], "csi-encoder-8-64-128");
}
#[test]
fn jsonl_sniff_tolerates_leading_whitespace() {
let padded = format!("\n \t{}", SAMPLE_HF_JSONL);
let reader = RvfReader::from_bytes(padded.as_bytes()).expect("whitespace prefix ok");
assert!(reader.manifest().is_some());
}
#[test]
fn jsonl_quantization_becomes_quant_segment() {
let reader = RvfReader::from_bytes(SAMPLE_HF_JSONL.as_bytes()).unwrap();
let q = reader
.quant_info()
.expect("quantization line should map to SEG_QUANT");
assert_eq!(q["default_bits"], 4);
assert_eq!(q["variants"], serde_json::json!([2, 4, 8]));
}
#[test]
fn jsonl_preserves_other_lines_in_metadata() {
let reader = RvfReader::from_bytes(SAMPLE_HF_JSONL.as_bytes()).unwrap();
let meta = reader.metadata().expect("aggregated metadata present");
assert_eq!(meta["source_format"], "rvf-jsonl");
let lines = meta["lines"]
.as_array()
.expect("lines must be an array");
// metadata + encoder + lora + ewc -> 4 entries (quantization went to SEG_QUANT)
assert!(lines.len() >= 4, "got {} lines", lines.len());
let types: Vec<&str> = lines
.iter()
.filter_map(|v| v["type"].as_str())
.collect();
assert!(types.contains(&"metadata"));
assert!(types.contains(&"encoder"));
assert!(types.contains(&"lora"));
assert!(types.contains(&"ewc"));
}
#[test]
fn jsonl_no_weights_segment_present() {
// The JSONL bundle deliberately ships its f32 matrices in companion
// safetensors / qN files, so the reader should not invent fake weights.
let reader = RvfReader::from_bytes(SAMPLE_HF_JSONL.as_bytes()).unwrap();
assert!(reader.weights().is_none(), "JSONL must not synthesise weights");
assert!(!reader.info().has_weights);
}
#[test]
fn jsonl_progressive_loader_layer_a_works() {
use crate::rvf_pipeline::ProgressiveLoader;
// This is the integration point that broke when the loader couldn't
// sniff JSONL — verify Layer A reports the real model name now.
let mut loader = ProgressiveLoader::new(SAMPLE_HF_JSONL.as_bytes())
.expect("progressive loader accepts JSONL bytes");
let la = loader
.load_layer_a()
.expect("layer A must populate from synthesised manifest");
assert_eq!(la.model_name, "wifi-densepose-csi-embedding");
assert_eq!(la.version, "1.0.0");
assert!(la.n_segments > 0);
}
#[test]
fn jsonl_invalid_json_line_is_explicit() {
let bad = b"{\"type\":\"metadata\",\"name\":\"x\"}\nthis is not json\n";
let err = RvfReader::from_bytes(bad).unwrap_err();
assert!(err.contains("JSONL RVF"), "got: {err}");
assert!(err.contains("line 2"), "got: {err}");
}
#[test]
fn jsonl_missing_type_field_is_explicit() {
let bad = b"{\"name\":\"no-type-here\"}\n";
let err = RvfReader::from_bytes(bad).unwrap_err();
assert!(err.contains("missing required string field `type`"), "got: {err}");
}
#[test]
fn binary_error_mentions_jsonl_hint() {
// Garbage bytes should now hint at JSONL when applicable.
let mut data = vec![0u8; 128];
data[0..4].copy_from_slice(&0xDEAD_BEEFu32.to_le_bytes());
let err = RvfReader::from_bytes(&data).unwrap_err();
assert!(err.contains("invalid magic"));
// Hint text travels with the binary-path error so operators can grep it.
assert!(err.contains("JSONL"), "expected JSONL hint, got: {err}");
}
#[test]
fn jsonl_minimal_metadata_only() {
let minimal = b"{\"type\":\"metadata\",\"name\":\"tiny\",\"version\":\"0.0.1\"}\n";
let reader = RvfReader::from_bytes(minimal).unwrap();
let m = reader.manifest().unwrap();
assert_eq!(m["model_id"], "tiny");
assert_eq!(m["version"], "0.0.1");
}
#[test]
fn jsonl_blank_lines_only_rejected() {
// A file made entirely of whitespace doesn't trip the JSONL sniff
// (no `{` ever appears) and parses as an empty binary container.
// A file that starts with `{` but only contains blank lines after the
// first non-blank line must be rejected with a clear error rather than
// silently producing an empty reader.
let only_blanks = b"{\n\n";
let err = RvfReader::from_bytes(only_blanks).unwrap_err();
assert!(err.contains("JSONL RVF"), "got: {err}");
}
#[test]
fn test_rvf_multiple_lora_profiles() {
let w1: Vec<f32> = vec![1.0, 2.0, 3.0];