wifi-densepose/v2/crates/wifi-densepose-sensing-server/src/model_format.rs

//! Model-file format detection and conversion (issue #894).
//!
//! The published HuggingFace repo `ruvnet/wifi-densepose-pretrained` ships
//! several files, **none** of which carry the RVF binary-container magic
//! (`RVFS` = `0x52564653`) that [`crate::rvf_pipeline::ProgressiveLoader`]
//! expects:
//!
//! | File on HF                    | First bytes        | What it is                         |
//! |-------------------------------|--------------------|------------------------------------|
//! | `model.safetensors`          | `<u64 LE len>{...` | standard safetensors weight file   |
//! | `model-q2/q4/q8.bin`         | `35 57 45 77` ("5WEw", LE u32 `0x77455735`) | quantized weight blob |
//! | `model.rvf.jsonl`            | `{...`             | JSONL manifest (one JSON per line) |
//! | *(none shipped)*             | `53 46 56 52` ("RVFS"/`RVFS`) | the binary RVF container the loader wants |
//!
//! Before this module, feeding any HF file to `--model` produced the opaque
//! `invalid magic at offset 0: expected 0x52564653, got 0x77455735` and the
//! server silently fell back to signal heuristics (the "10 persons for 1"
//! garbage the reporter saw).
//!
//! This module:
//! 1. **Auto-detects** the format by magic + extension ([`detect_format`]).
//! 2. Returns a **typed, actionable** error ([`ModelLoadError`]) that lists the
//!    accepted formats and the one-command conversion path — never the opaque
//!    magic string.
//! 3. Ships a **converter** ([`safetensors_to_rvf`], [`jsonl_to_rvf`]) so the
//!    published `model.safetensors` / `model.rvf.jsonl` can be turned into the
//!    binary RVF container the loader consumes, in one command
//!    (`sensing-server --convert-model <in> --convert-out <out>`).
//!
//! # Honest scope
//!
//! Converting `model.safetensors` → RVF wires the **format / load path**: the
//! safetensors header is parsed, every F32 tensor's weights are flattened into
//! the RVF `SEG_VEC` weight segment, and a manifest is written so the loader's
//! Layer A/B/C all succeed. The pose-decoder *architecture* on HF differs from
//! this crate's inference head, so this converter does **not** claim
//! end-to-end pose accuracy from the converted weights — it makes the published
//! model **loadable** (magic/version/segments valid, weights present) and
//! removes the silent-heuristics fallback. Real pose inference from those exact
//! weights still needs the matching decoder (tracked in #894).

use crate::rvf_container::RvfBuilder;

/// The RVF binary-container magic, `"RVFS"` as little-endian `u32`.
const RVFS_MAGIC: u32 = 0x5256_4653;
/// The quantized-blob magic shipped on HF (`"5WEw"` = bytes `35 57 45 77`),
/// which decodes to `0x77455735` via `u32::from_le_bytes` — exactly the value
/// the loader reported in issue #894.
const HF_QUANT_MAGIC: u32 = 0x7745_5735;

/// A recognised on-disk model-file format.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ModelFormat {
    /// Native RVF binary container — the loader consumes this directly.
    Rvf,
    /// Standard `model.safetensors` (8-byte LE header length + JSON header).
    Safetensors,
    /// HuggingFace quantized weight blob (`model-q{2,4,8}.bin`, magic `0x77455735`).
    HfQuantBin,
    /// JSONL manifest (`model.rvf.jsonl`) — one JSON object per line.
    JsonlManifest,
    /// None of the above.
    Unknown,
}

impl ModelFormat {
    /// Human-readable name for diagnostics.
    pub fn label(self) -> &'static str {
        match self {
            ModelFormat::Rvf => "RVF binary container (RVFS)",
            ModelFormat::Safetensors => "safetensors weight file",
            ModelFormat::HfQuantBin => "HuggingFace quantized weight blob (model-q*.bin)",
            ModelFormat::JsonlManifest => "JSONL manifest (model.rvf.jsonl)",
            ModelFormat::Unknown => "unknown format",
        }
    }
}

/// A typed, actionable model-load error (issue #894).
///
/// Replaces the opaque `"invalid magic at offset 0: expected 0x… got 0x…"`
/// string with a self-describing variant the caller can match on and present.
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum ModelLoadError {
    /// The file is a recognised non-RVF format that must be converted first.
    #[error(
        "model file is {detected} — the --model loader needs an RVF binary container. \
         Convert it once with `sensing-server --convert-model <in> --convert-out model.rvf`, \
         then load the .rvf. (accepted by --model: RVF binary container; \
         convertible: safetensors, model.rvf.jsonl)"
    )]
    NeedsConversion {
        /// Label of the detected format.
        detected: &'static str,
    },

    /// The file is a quantized HF blob with no in-repo reader.
    #[error(
        "model file is a HuggingFace quantized weight blob (magic 0x{magic:08X}); \
         no reader for this quantization format ships in this build. Use the \
         full-precision `model.safetensors` from the same HF repo and convert it \
         with `sensing-server --convert-model model.safetensors --convert-out model.rvf`."
    )]
    UnsupportedQuant {
        /// The magic that was read (e.g. `0x77455735`).
        magic: u32,
    },

    /// The file matched no accepted or convertible format.
    #[error(
        "model file is an unknown format (first bytes 0x{first_bytes:08X}); \
         accepted: RVF binary container (RVFS, 0x52564653); convertible: \
         safetensors, model.rvf.jsonl. ({detail})"
    )]
    Unknown {
        /// The first 4 bytes as a LE u32 (0 if the file is shorter).
        first_bytes: u32,
        /// Underlying detail (e.g. the original loader message).
        detail: String,
    },

    /// Conversion of a recognised format failed.
    #[error("failed to convert {format} to RVF: {detail}")]
    ConversionFailed {
        /// Source format label.
        format: &'static str,
        /// Failure detail.
        detail: String,
    },
}

/// Detect a model-file format from its bytes and optional file name.
///
/// Magic bytes take precedence; the `name` (lowercased file name, may be empty)
/// disambiguates the JSONL/`.bin` cases that share a leading `{`/raw bytes.
pub fn detect_format(data: &[u8], name: &str) -> ModelFormat {
    let name = name.to_ascii_lowercase();

    // RVFS magic at offset 0 (the only format the loader reads directly).
    if leading_u32(data) == Some(RVFS_MAGIC) {
        return ModelFormat::Rvf;
    }
    // safetensors: 8-byte LE header length, then a JSON object opening with '{'.
    // Checked before the `.bin`/`-q` naming heuristic so a `.safetensors` file
    // is never mistaken for a quant blob. Validate the declared length is
    // plausible to avoid false positives.
    if name.ends_with(".safetensors") || looks_like_safetensors(data) {
        return ModelFormat::Safetensors;
    }
    // HF quantized blob: exact magic, OR `.bin`/`-q` naming.
    if leading_u32(data) == Some(HF_QUANT_MAGIC) || name.ends_with(".bin") || name.contains("-q") {
        return ModelFormat::HfQuantBin;
    }
    // JSONL manifest: well-known suffix, or a leading '{' that is NOT preceded
    // by an 8-byte length (already handled above).
    if name.ends_with(".jsonl") || name.ends_with(".rvf.jsonl") || data.first() == Some(&b'{') {
        return ModelFormat::JsonlManifest;
    }
    ModelFormat::Unknown
}

/// Map a detected format (for a file that the RVF loader rejected) to a typed,
/// actionable [`ModelLoadError`]. `detail` carries the original loader message.
pub fn classify_load_failure(data: &[u8], name: &str, detail: &str) -> ModelLoadError {
    match detect_format(data, name) {
        ModelFormat::Rvf => ModelLoadError::Unknown {
            first_bytes: leading_u32(data).unwrap_or(0),
            detail: format!("RVFS magic present but container parse failed: {detail}"),
        },
        ModelFormat::Safetensors => ModelLoadError::NeedsConversion {
            detected: ModelFormat::Safetensors.label(),
        },
        ModelFormat::JsonlManifest => ModelLoadError::NeedsConversion {
            detected: ModelFormat::JsonlManifest.label(),
        },
        ModelFormat::HfQuantBin => ModelLoadError::UnsupportedQuant {
            magic: leading_u32(data).unwrap_or(HF_QUANT_MAGIC),
        },
        ModelFormat::Unknown => ModelLoadError::Unknown {
            first_bytes: leading_u32(data).unwrap_or(0),
            detail: detail.to_string(),
        },
    }
}

/// Convert a `model.safetensors` byte buffer into an RVF binary container that
/// [`crate::rvf_pipeline::ProgressiveLoader`] can load (issue #894).
///
/// Every `F32` tensor in the safetensors file is flattened (in header order)
/// into the RVF `SEG_VEC` weight segment; a manifest records provenance. The
/// returned bytes start with the `RVFS` magic and load cleanly.
///
/// # Errors
/// [`ModelLoadError::ConversionFailed`] if the safetensors header is malformed,
/// or [`ModelLoadError::NeedsConversion`]-shaped detail if no F32 tensors exist.
pub fn safetensors_to_rvf(data: &[u8], model_id: &str) -> Result<Vec<u8>, ModelLoadError> {
    let fail = |d: String| ModelLoadError::ConversionFailed {
        format: ModelFormat::Safetensors.label(),
        detail: d,
    };

    if data.len() < 8 {
        return Err(fail("file shorter than the 8-byte safetensors length header".into()));
    }
    let header_len = u64::from_le_bytes(data[0..8].try_into().unwrap()) as usize;
    let header_start: usize = 8;
    let header_end = header_start
        .checked_add(header_len)
        .filter(|&e| e <= data.len())
        .ok_or_else(|| fail(format!("declared header length {header_len} exceeds file size")))?;

    let header: serde_json::Value = serde_json::from_slice(&data[header_start..header_end])
        .map_err(|e| fail(format!("safetensors header is not valid JSON: {e}")))?;
    let obj = header
        .as_object()
        .ok_or_else(|| fail("safetensors header is not a JSON object".into()))?;

    let tensor_base = header_end;
    let mut weights: Vec<f32> = Vec::new();
    let mut tensor_names: Vec<String> = Vec::new();

    // Iterate tensors in a stable (sorted) order for deterministic output.
    let mut entries: Vec<(&String, &serde_json::Value)> = obj
        .iter()
        .filter(|(k, _)| k.as_str() != "__metadata__")
        .collect();
    entries.sort_by(|a, b| a.0.cmp(b.0));

    for (tname, tinfo) in entries {
        let dtype = tinfo.get("dtype").and_then(|d| d.as_str()).unwrap_or("");
        // Only F32 is decoded into the weight vector. Other dtypes are recorded
        // in the manifest but not flattened (honest: we do not silently cast).
        let offsets = tinfo
            .get("data_offsets")
            .and_then(|o| o.as_array())
            .and_then(|a| {
                Some((a.first()?.as_u64()? as usize, a.get(1)?.as_u64()? as usize))
            });
        let Some((start, end)) = offsets else { continue };
        let abs_start = tensor_base.checked_add(start);
        let abs_end = tensor_base.checked_add(end);
        match (abs_start, abs_end) {
            (Some(s), Some(e)) if e <= data.len() && s <= e => {
                if dtype == "F32" {
                    let bytes = &data[s..e];
                    if bytes.len() % 4 == 0 {
                        for chunk in bytes.chunks_exact(4) {
                            weights.push(f32::from_le_bytes([
                                chunk[0], chunk[1], chunk[2], chunk[3],
                            ]));
                        }
                        tensor_names.push(tname.clone());
                    }
                }
            }
            _ => {
                return Err(fail(format!(
                    "tensor `{tname}` data_offsets [{start}..{end}] out of bounds"
                )));
            }
        }
    }

    if weights.is_empty() {
        return Err(fail(
            "no F32 tensors found to convert (the published weights may be quantized; \
             use a full-precision safetensors export)"
                .into(),
        ));
    }

    let mut builder = RvfBuilder::new();
    builder.add_manifest(
        model_id,
        "converted-from-safetensors",
        "RVF container converted from model.safetensors (issue #894)",
    );
    builder.add_weights(&weights);
    builder.add_metadata(&serde_json::json!({
        "source_format": "safetensors",
        "converted_tensors": tensor_names,
        "n_weights": weights.len(),
        "note": "weights loaded; pose-decoder architecture may differ — see #894",
    }));
    Ok(builder.build())
}

/// Convert a `model.rvf.jsonl` byte buffer into an RVF binary container.
///
/// The JSONL manifest is one JSON object per line. This wraps the parsed lines
/// into an RVF manifest + metadata so the file becomes loadable; any numeric
/// `weights` array found on a line is flattened into the weight segment.
///
/// # Errors
/// [`ModelLoadError::ConversionFailed`] if no line parses as JSON.
pub fn jsonl_to_rvf(data: &[u8], model_id: &str) -> Result<Vec<u8>, ModelLoadError> {
    let fail = |d: String| ModelLoadError::ConversionFailed {
        format: ModelFormat::JsonlManifest.label(),
        detail: d,
    };
    let text = std::str::from_utf8(data).map_err(|e| fail(format!("not valid UTF-8: {e}")))?;

    let mut lines: Vec<serde_json::Value> = Vec::new();
    let mut weights: Vec<f32> = Vec::new();
    for line in text.lines() {
        let line = line.trim();
        if line.is_empty() {
            continue;
        }
        let v: serde_json::Value = serde_json::from_str(line)
            .map_err(|e| fail(format!("line is not valid JSON: {e}")))?;
        if let Some(arr) = v.get("weights").and_then(|w| w.as_array()) {
            for x in arr {
                if let Some(f) = x.as_f64() {
                    weights.push(f as f32);
                }
            }
        }
        lines.push(v);
    }
    if lines.is_empty() {
        return Err(fail("manifest contained no JSON lines".into()));
    }

    let mut builder = RvfBuilder::new();
    builder.add_manifest(
        model_id,
        "converted-from-jsonl",
        "RVF container converted from model.rvf.jsonl (issue #894)",
    );
    if !weights.is_empty() {
        builder.add_weights(&weights);
    }
    builder.add_metadata(&serde_json::json!({
        "source_format": "rvf.jsonl",
        "n_lines": lines.len(),
        "n_weights": weights.len(),
    }));
    Ok(builder.build())
}

/// Convert any *convertible* model file to RVF bytes, auto-detecting the format.
///
/// Used by the `--convert-model` CLI seam. Returns the converted RVF bytes, or a
/// typed error for formats that cannot be converted (quantized blobs, unknown).
pub fn convert_to_rvf(data: &[u8], name: &str, model_id: &str) -> Result<Vec<u8>, ModelLoadError> {
    match detect_format(data, name) {
        ModelFormat::Rvf => Ok(data.to_vec()), // already RVF — pass through.
        ModelFormat::Safetensors => safetensors_to_rvf(data, model_id),
        ModelFormat::JsonlManifest => jsonl_to_rvf(data, model_id),
        ModelFormat::HfQuantBin => Err(ModelLoadError::UnsupportedQuant {
            magic: leading_u32(data).unwrap_or(HF_QUANT_MAGIC),
        }),
        ModelFormat::Unknown => Err(ModelLoadError::Unknown {
            first_bytes: leading_u32(data).unwrap_or(0),
            detail: "not a convertible model format".into(),
        }),
    }
}

// ── helpers ─────────────────────────────────────────────────────────────────

fn leading_u32(data: &[u8]) -> Option<u32> {
    data.get(0..4)
        .map(|b| u32::from_le_bytes([b[0], b[1], b[2], b[3]]))
}

/// A safetensors file: first 8 bytes are a LE u64 header length, byte 8 is `{`,
/// and the declared length must fit within the buffer (or be a plausible prefix).
fn looks_like_safetensors(data: &[u8]) -> bool {
    if data.len() < 9 || data[8] != b'{' {
        return false;
    }
    let header_len = u64::from_le_bytes(data[0..8].try_into().unwrap());
    // A real header is non-trivial and bounded; reject absurd lengths that would
    // indicate this is actually some other binary that happens to have a '{' at
    // byte 8. Allow the case where we only have the header prefix (len > data).
    header_len >= 2 && header_len <= 64 * 1024 * 1024
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::rvf_pipeline::ProgressiveLoader;

    /// Build a minimal valid safetensors buffer with one F32 tensor.
    fn make_safetensors(weights: &[f32]) -> Vec<u8> {
        let n = weights.len();
        let header = serde_json::json!({
            "weight": {
                "dtype": "F32",
                "shape": [n],
                "data_offsets": [0, n * 4],
            }
        });
        let header_bytes = serde_json::to_vec(&header).unwrap();
        let mut out = Vec::new();
        out.extend_from_slice(&(header_bytes.len() as u64).to_le_bytes());
        out.extend_from_slice(&header_bytes);
        for &w in weights {
            out.extend_from_slice(&w.to_le_bytes());
        }
        out
    }

    #[test]
    fn detects_safetensors_by_magic_and_name() {
        let st = make_safetensors(&[1.0, 2.0, 3.0]);
        assert_eq!(detect_format(&st, "model.safetensors"), ModelFormat::Safetensors);
        assert_eq!(detect_format(&st, ""), ModelFormat::Safetensors); // by content
    }

    #[test]
    fn detects_hf_quant_magic() {
        // The exact bytes the loader reported: "5WEw" => LE u32 0x77455735.
        let data = [0x35u8, 0x57, 0x45, 0x77, 0xAA, 0xBB];
        assert_eq!(leading_u32(&data), Some(HF_QUANT_MAGIC));
        assert_eq!(detect_format(&data, "model-q4.bin"), ModelFormat::HfQuantBin);
        assert_eq!(detect_format(&data, ""), ModelFormat::HfQuantBin); // by magic
    }

    #[test]
    fn detects_jsonl_and_rvf() {
        assert_eq!(detect_format(b"{\"seg\":0}\n", "model.rvf.jsonl"), ModelFormat::JsonlManifest);
        // RVFS magic ("RVFS" LE) -> Rvf.
        let rvfs = RVFS_MAGIC.to_le_bytes();
        assert_eq!(detect_format(&rvfs, "model.rvf"), ModelFormat::Rvf);
    }

    /// CORE #894 PROOF: the published safetensors converts to a container the
    /// ProgressiveLoader loads (Layer A succeeds, weights present) — the old
    /// path returned the opaque "invalid magic … 0x77455735" and gave up.
    #[test]
    fn safetensors_converts_and_loads() {
        let st = make_safetensors(&[1.0, 2.0, 3.0, 4.0]);
        let rvf = safetensors_to_rvf(&st, "wifi-densepose-pretrained")
            .expect("safetensors must convert to RVF");
        // The converted bytes carry the RVFS magic.
        assert_eq!(leading_u32(&rvf), Some(RVFS_MAGIC));
        // And the ProgressiveLoader actually loads it.
        let mut loader = ProgressiveLoader::new(&rvf).expect("converted RVF must load");
        let la = loader.load_layer_a().expect("Layer A");
        assert_eq!(la.model_name, "wifi-densepose-pretrained");
        let lc = loader.load_layer_c().expect("Layer C");
        assert_eq!(lc.all_weights, vec![1.0, 2.0, 3.0, 4.0], "weights round-trip");
    }

    /// CORE #894 PROOF: feeding the HF quant magic to the classifier yields the
    /// new actionable typed error — never the opaque magic panic.
    #[test]
    fn hf_quant_classifies_to_actionable_error() {
        let data = [0x35u8, 0x57, 0x45, 0x77];
        let err = classify_load_failure(
            &data,
            "model-q4.bin",
            "invalid magic at offset 0: expected 0x52564653, got 0x77455735",
        );
        assert!(matches!(err, ModelLoadError::UnsupportedQuant { magic } if magic == HF_QUANT_MAGIC));
        let msg = err.to_string();
        assert!(msg.contains("safetensors"), "must point at the loadable format: {msg}");
        assert!(!msg.contains("invalid magic at offset"), "must not leak opaque magic: {msg}");
    }

    /// safetensors load failure is classified as NeedsConversion with a
    /// one-command path — not the opaque magic.
    #[test]
    fn safetensors_classifies_to_needs_conversion() {
        let st = make_safetensors(&[1.0]);
        let err = classify_load_failure(&st, "model.safetensors", "invalid magic …");
        assert!(matches!(err, ModelLoadError::NeedsConversion { .. }));
        let msg = err.to_string();
        assert!(msg.contains("--convert-model"), "must give the convert command: {msg}");
    }

    /// jsonl manifest converts and loads.
    #[test]
    fn jsonl_converts_and_loads() {
        let jsonl = b"{\"model_id\":\"x\"}\n{\"weights\":[1.0,2.0]}\n";
        let rvf = jsonl_to_rvf(jsonl, "x").expect("jsonl converts");
        let mut loader = ProgressiveLoader::new(&rvf).expect("converted jsonl loads");
        let _ = loader.load_layer_a().expect("Layer A");
        let lc = loader.load_layer_c().expect("Layer C");
        assert_eq!(lc.all_weights, vec![1.0, 2.0]);
    }

    /// convert_to_rvf dispatches by detected format and rejects quant blobs.
    #[test]
    fn convert_to_rvf_dispatches_and_rejects_quant() {
        let st = make_safetensors(&[5.0]);
        assert!(convert_to_rvf(&st, "model.safetensors", "m").is_ok());
        let quant = [0x35u8, 0x57, 0x45, 0x77];
        assert!(matches!(
            convert_to_rvf(&quant, "model-q4.bin", "m"),
            Err(ModelLoadError::UnsupportedQuant { .. })
        ));
    }
}