wifi-densepose/v2/crates/wifi-densepose-sensing-server/src/model_format.rs

498 lines
21 KiB
Rust

//! Model-file format detection and conversion (issue #894).
//!
//! The published HuggingFace repo `ruvnet/wifi-densepose-pretrained` ships
//! several files, **none** of which carry the RVF binary-container magic
//! (`RVFS` = `0x52564653`) that [`crate::rvf_pipeline::ProgressiveLoader`]
//! expects:
//!
//! | File on HF | First bytes | What it is |
//! |-------------------------------|--------------------|------------------------------------|
//! | `model.safetensors` | `<u64 LE len>{...` | standard safetensors weight file |
//! | `model-q2/q4/q8.bin` | `35 57 45 77` ("5WEw", LE u32 `0x77455735`) | quantized weight blob |
//! | `model.rvf.jsonl` | `{...` | JSONL manifest (one JSON per line) |
//! | *(none shipped)* | `53 46 56 52` ("RVFS"/`RVFS`) | the binary RVF container the loader wants |
//!
//! Before this module, feeding any HF file to `--model` produced the opaque
//! `invalid magic at offset 0: expected 0x52564653, got 0x77455735` and the
//! server silently fell back to signal heuristics (the "10 persons for 1"
//! garbage the reporter saw).
//!
//! This module:
//! 1. **Auto-detects** the format by magic + extension ([`detect_format`]).
//! 2. Returns a **typed, actionable** error ([`ModelLoadError`]) that lists the
//! accepted formats and the one-command conversion path — never the opaque
//! magic string.
//! 3. Ships a **converter** ([`safetensors_to_rvf`], [`jsonl_to_rvf`]) so the
//! published `model.safetensors` / `model.rvf.jsonl` can be turned into the
//! binary RVF container the loader consumes, in one command
//! (`sensing-server --convert-model <in> --convert-out <out>`).
//!
//! # Honest scope
//!
//! Converting `model.safetensors` → RVF wires the **format / load path**: the
//! safetensors header is parsed, every F32 tensor's weights are flattened into
//! the RVF `SEG_VEC` weight segment, and a manifest is written so the loader's
//! Layer A/B/C all succeed. The pose-decoder *architecture* on HF differs from
//! this crate's inference head, so this converter does **not** claim
//! end-to-end pose accuracy from the converted weights — it makes the published
//! model **loadable** (magic/version/segments valid, weights present) and
//! removes the silent-heuristics fallback. Real pose inference from those exact
//! weights still needs the matching decoder (tracked in #894).
use crate::rvf_container::RvfBuilder;
/// The RVF binary-container magic, `"RVFS"` as little-endian `u32`.
const RVFS_MAGIC: u32 = 0x5256_4653;
/// The quantized-blob magic shipped on HF (`"5WEw"` = bytes `35 57 45 77`),
/// which decodes to `0x77455735` via `u32::from_le_bytes` — exactly the value
/// the loader reported in issue #894.
const HF_QUANT_MAGIC: u32 = 0x7745_5735;
/// A recognised on-disk model-file format.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ModelFormat {
/// Native RVF binary container — the loader consumes this directly.
Rvf,
/// Standard `model.safetensors` (8-byte LE header length + JSON header).
Safetensors,
/// HuggingFace quantized weight blob (`model-q{2,4,8}.bin`, magic `0x77455735`).
HfQuantBin,
/// JSONL manifest (`model.rvf.jsonl`) — one JSON object per line.
JsonlManifest,
/// None of the above.
Unknown,
}
impl ModelFormat {
/// Human-readable name for diagnostics.
pub fn label(self) -> &'static str {
match self {
ModelFormat::Rvf => "RVF binary container (RVFS)",
ModelFormat::Safetensors => "safetensors weight file",
ModelFormat::HfQuantBin => "HuggingFace quantized weight blob (model-q*.bin)",
ModelFormat::JsonlManifest => "JSONL manifest (model.rvf.jsonl)",
ModelFormat::Unknown => "unknown format",
}
}
}
/// A typed, actionable model-load error (issue #894).
///
/// Replaces the opaque `"invalid magic at offset 0: expected 0x… got 0x…"`
/// string with a self-describing variant the caller can match on and present.
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum ModelLoadError {
/// The file is a recognised non-RVF format that must be converted first.
#[error(
"model file is {detected} — the --model loader needs an RVF binary container. \
Convert it once with `sensing-server --convert-model <in> --convert-out model.rvf`, \
then load the .rvf. (accepted by --model: RVF binary container; \
convertible: safetensors, model.rvf.jsonl)"
)]
NeedsConversion {
/// Label of the detected format.
detected: &'static str,
},
/// The file is a quantized HF blob with no in-repo reader.
#[error(
"model file is a HuggingFace quantized weight blob (magic 0x{magic:08X}); \
no reader for this quantization format ships in this build. Use the \
full-precision `model.safetensors` from the same HF repo and convert it \
with `sensing-server --convert-model model.safetensors --convert-out model.rvf`."
)]
UnsupportedQuant {
/// The magic that was read (e.g. `0x77455735`).
magic: u32,
},
/// The file matched no accepted or convertible format.
#[error(
"model file is an unknown format (first bytes 0x{first_bytes:08X}); \
accepted: RVF binary container (RVFS, 0x52564653); convertible: \
safetensors, model.rvf.jsonl. ({detail})"
)]
Unknown {
/// The first 4 bytes as a LE u32 (0 if the file is shorter).
first_bytes: u32,
/// Underlying detail (e.g. the original loader message).
detail: String,
},
/// Conversion of a recognised format failed.
#[error("failed to convert {format} to RVF: {detail}")]
ConversionFailed {
/// Source format label.
format: &'static str,
/// Failure detail.
detail: String,
},
}
/// Detect a model-file format from its bytes and optional file name.
///
/// Magic bytes take precedence; the `name` (lowercased file name, may be empty)
/// disambiguates the JSONL/`.bin` cases that share a leading `{`/raw bytes.
pub fn detect_format(data: &[u8], name: &str) -> ModelFormat {
let name = name.to_ascii_lowercase();
// RVFS magic at offset 0 (the only format the loader reads directly).
if leading_u32(data) == Some(RVFS_MAGIC) {
return ModelFormat::Rvf;
}
// safetensors: 8-byte LE header length, then a JSON object opening with '{'.
// Checked before the `.bin`/`-q` naming heuristic so a `.safetensors` file
// is never mistaken for a quant blob. Validate the declared length is
// plausible to avoid false positives.
if name.ends_with(".safetensors") || looks_like_safetensors(data) {
return ModelFormat::Safetensors;
}
// HF quantized blob: exact magic, OR `.bin`/`-q` naming.
if leading_u32(data) == Some(HF_QUANT_MAGIC) || name.ends_with(".bin") || name.contains("-q") {
return ModelFormat::HfQuantBin;
}
// JSONL manifest: well-known suffix, or a leading '{' that is NOT preceded
// by an 8-byte length (already handled above).
if name.ends_with(".jsonl") || name.ends_with(".rvf.jsonl") || data.first() == Some(&b'{') {
return ModelFormat::JsonlManifest;
}
ModelFormat::Unknown
}
/// Map a detected format (for a file that the RVF loader rejected) to a typed,
/// actionable [`ModelLoadError`]. `detail` carries the original loader message.
pub fn classify_load_failure(data: &[u8], name: &str, detail: &str) -> ModelLoadError {
match detect_format(data, name) {
ModelFormat::Rvf => ModelLoadError::Unknown {
first_bytes: leading_u32(data).unwrap_or(0),
detail: format!("RVFS magic present but container parse failed: {detail}"),
},
ModelFormat::Safetensors => ModelLoadError::NeedsConversion {
detected: ModelFormat::Safetensors.label(),
},
ModelFormat::JsonlManifest => ModelLoadError::NeedsConversion {
detected: ModelFormat::JsonlManifest.label(),
},
ModelFormat::HfQuantBin => ModelLoadError::UnsupportedQuant {
magic: leading_u32(data).unwrap_or(HF_QUANT_MAGIC),
},
ModelFormat::Unknown => ModelLoadError::Unknown {
first_bytes: leading_u32(data).unwrap_or(0),
detail: detail.to_string(),
},
}
}
/// Convert a `model.safetensors` byte buffer into an RVF binary container that
/// [`crate::rvf_pipeline::ProgressiveLoader`] can load (issue #894).
///
/// Every `F32` tensor in the safetensors file is flattened (in header order)
/// into the RVF `SEG_VEC` weight segment; a manifest records provenance. The
/// returned bytes start with the `RVFS` magic and load cleanly.
///
/// # Errors
/// [`ModelLoadError::ConversionFailed`] if the safetensors header is malformed,
/// or [`ModelLoadError::NeedsConversion`]-shaped detail if no F32 tensors exist.
pub fn safetensors_to_rvf(data: &[u8], model_id: &str) -> Result<Vec<u8>, ModelLoadError> {
let fail = |d: String| ModelLoadError::ConversionFailed {
format: ModelFormat::Safetensors.label(),
detail: d,
};
if data.len() < 8 {
return Err(fail("file shorter than the 8-byte safetensors length header".into()));
}
let header_len = u64::from_le_bytes(data[0..8].try_into().unwrap()) as usize;
let header_start: usize = 8;
let header_end = header_start
.checked_add(header_len)
.filter(|&e| e <= data.len())
.ok_or_else(|| fail(format!("declared header length {header_len} exceeds file size")))?;
let header: serde_json::Value = serde_json::from_slice(&data[header_start..header_end])
.map_err(|e| fail(format!("safetensors header is not valid JSON: {e}")))?;
let obj = header
.as_object()
.ok_or_else(|| fail("safetensors header is not a JSON object".into()))?;
let tensor_base = header_end;
let mut weights: Vec<f32> = Vec::new();
let mut tensor_names: Vec<String> = Vec::new();
// Iterate tensors in a stable (sorted) order for deterministic output.
let mut entries: Vec<(&String, &serde_json::Value)> = obj
.iter()
.filter(|(k, _)| k.as_str() != "__metadata__")
.collect();
entries.sort_by(|a, b| a.0.cmp(b.0));
for (tname, tinfo) in entries {
let dtype = tinfo.get("dtype").and_then(|d| d.as_str()).unwrap_or("");
// Only F32 is decoded into the weight vector. Other dtypes are recorded
// in the manifest but not flattened (honest: we do not silently cast).
let offsets = tinfo
.get("data_offsets")
.and_then(|o| o.as_array())
.and_then(|a| {
Some((a.first()?.as_u64()? as usize, a.get(1)?.as_u64()? as usize))
});
let Some((start, end)) = offsets else { continue };
let abs_start = tensor_base.checked_add(start);
let abs_end = tensor_base.checked_add(end);
match (abs_start, abs_end) {
(Some(s), Some(e)) if e <= data.len() && s <= e => {
if dtype == "F32" {
let bytes = &data[s..e];
if bytes.len() % 4 == 0 {
for chunk in bytes.chunks_exact(4) {
weights.push(f32::from_le_bytes([
chunk[0], chunk[1], chunk[2], chunk[3],
]));
}
tensor_names.push(tname.clone());
}
}
}
_ => {
return Err(fail(format!(
"tensor `{tname}` data_offsets [{start}..{end}] out of bounds"
)));
}
}
}
if weights.is_empty() {
return Err(fail(
"no F32 tensors found to convert (the published weights may be quantized; \
use a full-precision safetensors export)"
.into(),
));
}
let mut builder = RvfBuilder::new();
builder.add_manifest(
model_id,
"converted-from-safetensors",
"RVF container converted from model.safetensors (issue #894)",
);
builder.add_weights(&weights);
builder.add_metadata(&serde_json::json!({
"source_format": "safetensors",
"converted_tensors": tensor_names,
"n_weights": weights.len(),
"note": "weights loaded; pose-decoder architecture may differ — see #894",
}));
Ok(builder.build())
}
/// Convert a `model.rvf.jsonl` byte buffer into an RVF binary container.
///
/// The JSONL manifest is one JSON object per line. This wraps the parsed lines
/// into an RVF manifest + metadata so the file becomes loadable; any numeric
/// `weights` array found on a line is flattened into the weight segment.
///
/// # Errors
/// [`ModelLoadError::ConversionFailed`] if no line parses as JSON.
pub fn jsonl_to_rvf(data: &[u8], model_id: &str) -> Result<Vec<u8>, ModelLoadError> {
let fail = |d: String| ModelLoadError::ConversionFailed {
format: ModelFormat::JsonlManifest.label(),
detail: d,
};
let text = std::str::from_utf8(data).map_err(|e| fail(format!("not valid UTF-8: {e}")))?;
let mut lines: Vec<serde_json::Value> = Vec::new();
let mut weights: Vec<f32> = Vec::new();
for line in text.lines() {
let line = line.trim();
if line.is_empty() {
continue;
}
let v: serde_json::Value = serde_json::from_str(line)
.map_err(|e| fail(format!("line is not valid JSON: {e}")))?;
if let Some(arr) = v.get("weights").and_then(|w| w.as_array()) {
for x in arr {
if let Some(f) = x.as_f64() {
weights.push(f as f32);
}
}
}
lines.push(v);
}
if lines.is_empty() {
return Err(fail("manifest contained no JSON lines".into()));
}
let mut builder = RvfBuilder::new();
builder.add_manifest(
model_id,
"converted-from-jsonl",
"RVF container converted from model.rvf.jsonl (issue #894)",
);
if !weights.is_empty() {
builder.add_weights(&weights);
}
builder.add_metadata(&serde_json::json!({
"source_format": "rvf.jsonl",
"n_lines": lines.len(),
"n_weights": weights.len(),
}));
Ok(builder.build())
}
/// Convert any *convertible* model file to RVF bytes, auto-detecting the format.
///
/// Used by the `--convert-model` CLI seam. Returns the converted RVF bytes, or a
/// typed error for formats that cannot be converted (quantized blobs, unknown).
pub fn convert_to_rvf(data: &[u8], name: &str, model_id: &str) -> Result<Vec<u8>, ModelLoadError> {
match detect_format(data, name) {
ModelFormat::Rvf => Ok(data.to_vec()), // already RVF — pass through.
ModelFormat::Safetensors => safetensors_to_rvf(data, model_id),
ModelFormat::JsonlManifest => jsonl_to_rvf(data, model_id),
ModelFormat::HfQuantBin => Err(ModelLoadError::UnsupportedQuant {
magic: leading_u32(data).unwrap_or(HF_QUANT_MAGIC),
}),
ModelFormat::Unknown => Err(ModelLoadError::Unknown {
first_bytes: leading_u32(data).unwrap_or(0),
detail: "not a convertible model format".into(),
}),
}
}
// ── helpers ─────────────────────────────────────────────────────────────────
fn leading_u32(data: &[u8]) -> Option<u32> {
data.get(0..4)
.map(|b| u32::from_le_bytes([b[0], b[1], b[2], b[3]]))
}
/// A safetensors file: first 8 bytes are a LE u64 header length, byte 8 is `{`,
/// and the declared length must fit within the buffer (or be a plausible prefix).
fn looks_like_safetensors(data: &[u8]) -> bool {
if data.len() < 9 || data[8] != b'{' {
return false;
}
let header_len = u64::from_le_bytes(data[0..8].try_into().unwrap());
// A real header is non-trivial and bounded; reject absurd lengths that would
// indicate this is actually some other binary that happens to have a '{' at
// byte 8. Allow the case where we only have the header prefix (len > data).
header_len >= 2 && header_len <= 64 * 1024 * 1024
}
#[cfg(test)]
mod tests {
use super::*;
use crate::rvf_pipeline::ProgressiveLoader;
/// Build a minimal valid safetensors buffer with one F32 tensor.
fn make_safetensors(weights: &[f32]) -> Vec<u8> {
let n = weights.len();
let header = serde_json::json!({
"weight": {
"dtype": "F32",
"shape": [n],
"data_offsets": [0, n * 4],
}
});
let header_bytes = serde_json::to_vec(&header).unwrap();
let mut out = Vec::new();
out.extend_from_slice(&(header_bytes.len() as u64).to_le_bytes());
out.extend_from_slice(&header_bytes);
for &w in weights {
out.extend_from_slice(&w.to_le_bytes());
}
out
}
#[test]
fn detects_safetensors_by_magic_and_name() {
let st = make_safetensors(&[1.0, 2.0, 3.0]);
assert_eq!(detect_format(&st, "model.safetensors"), ModelFormat::Safetensors);
assert_eq!(detect_format(&st, ""), ModelFormat::Safetensors); // by content
}
#[test]
fn detects_hf_quant_magic() {
// The exact bytes the loader reported: "5WEw" => LE u32 0x77455735.
let data = [0x35u8, 0x57, 0x45, 0x77, 0xAA, 0xBB];
assert_eq!(leading_u32(&data), Some(HF_QUANT_MAGIC));
assert_eq!(detect_format(&data, "model-q4.bin"), ModelFormat::HfQuantBin);
assert_eq!(detect_format(&data, ""), ModelFormat::HfQuantBin); // by magic
}
#[test]
fn detects_jsonl_and_rvf() {
assert_eq!(detect_format(b"{\"seg\":0}\n", "model.rvf.jsonl"), ModelFormat::JsonlManifest);
// RVFS magic ("RVFS" LE) -> Rvf.
let rvfs = RVFS_MAGIC.to_le_bytes();
assert_eq!(detect_format(&rvfs, "model.rvf"), ModelFormat::Rvf);
}
/// CORE #894 PROOF: the published safetensors converts to a container the
/// ProgressiveLoader loads (Layer A succeeds, weights present) — the old
/// path returned the opaque "invalid magic … 0x77455735" and gave up.
#[test]
fn safetensors_converts_and_loads() {
let st = make_safetensors(&[1.0, 2.0, 3.0, 4.0]);
let rvf = safetensors_to_rvf(&st, "wifi-densepose-pretrained")
.expect("safetensors must convert to RVF");
// The converted bytes carry the RVFS magic.
assert_eq!(leading_u32(&rvf), Some(RVFS_MAGIC));
// And the ProgressiveLoader actually loads it.
let mut loader = ProgressiveLoader::new(&rvf).expect("converted RVF must load");
let la = loader.load_layer_a().expect("Layer A");
assert_eq!(la.model_name, "wifi-densepose-pretrained");
let lc = loader.load_layer_c().expect("Layer C");
assert_eq!(lc.all_weights, vec![1.0, 2.0, 3.0, 4.0], "weights round-trip");
}
/// CORE #894 PROOF: feeding the HF quant magic to the classifier yields the
/// new actionable typed error — never the opaque magic panic.
#[test]
fn hf_quant_classifies_to_actionable_error() {
let data = [0x35u8, 0x57, 0x45, 0x77];
let err = classify_load_failure(
&data,
"model-q4.bin",
"invalid magic at offset 0: expected 0x52564653, got 0x77455735",
);
assert!(matches!(err, ModelLoadError::UnsupportedQuant { magic } if magic == HF_QUANT_MAGIC));
let msg = err.to_string();
assert!(msg.contains("safetensors"), "must point at the loadable format: {msg}");
assert!(!msg.contains("invalid magic at offset"), "must not leak opaque magic: {msg}");
}
/// safetensors load failure is classified as NeedsConversion with a
/// one-command path — not the opaque magic.
#[test]
fn safetensors_classifies_to_needs_conversion() {
let st = make_safetensors(&[1.0]);
let err = classify_load_failure(&st, "model.safetensors", "invalid magic …");
assert!(matches!(err, ModelLoadError::NeedsConversion { .. }));
let msg = err.to_string();
assert!(msg.contains("--convert-model"), "must give the convert command: {msg}");
}
/// jsonl manifest converts and loads.
#[test]
fn jsonl_converts_and_loads() {
let jsonl = b"{\"model_id\":\"x\"}\n{\"weights\":[1.0,2.0]}\n";
let rvf = jsonl_to_rvf(jsonl, "x").expect("jsonl converts");
let mut loader = ProgressiveLoader::new(&rvf).expect("converted jsonl loads");
let _ = loader.load_layer_a().expect("Layer A");
let lc = loader.load_layer_c().expect("Layer C");
assert_eq!(lc.all_weights, vec![1.0, 2.0]);
}
/// convert_to_rvf dispatches by detected format and rejects quant blobs.
#[test]
fn convert_to_rvf_dispatches_and_rejects_quant() {
let st = make_safetensors(&[5.0]);
assert!(convert_to_rvf(&st, "model.safetensors", "m").is_ok());
let quant = [0x35u8, 0x57, 0x45, 0x77];
assert!(matches!(
convert_to_rvf(&quant, "model-q4.bin", "m"),
Err(ModelLoadError::UnsupportedQuant { .. })
));
}
}