From 73321db765b6d3c4d57632fed41a46c7eebfe2b6 Mon Sep 17 00:00:00 2001 From: ruv Date: Fri, 8 May 2026 11:49:19 -0400 Subject: [PATCH] feat(temporal): init_random_blob example + filesystem e2e tests (#513) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the host→file→firmware loop on the Phase 1 weight format. Real .rvne artifact emitted from the example, parsed back through filesystem in the e2e test, byte-identical across two seeded runs. - examples/init_random_blob.rs — produces a 41,244-byte deployable blob matching the AETHER default head shape (input_dim=16, q_heads=4, kv_heads=1 [MQA], head_dim=32, layers=2, classes=4 — staying coherent with TemporalHeadConfig::default_aether so a real trainer can drop in this shape with one search-and-replace). Uses xorshift64* with a fixed seed (0xC511_0007_DEAD_BEEF) for reproducibility. Per-layer weight count derivation lives in the example (Wq + Wk + Wv + Wo, plus a final classifier head) so the kernel's expectation is anchored in code rather than a comment that drifts. - tests/blob_e2e.rs — two new tests, 15/15 total now passing: * realistic_blob_roundtrips_through_filesystem — writes a 25+ KB blob to std::env::temp_dir(), reads it back, parses, validates. Mirrors what the firmware loader will do once the toolchain unblocks (mmap NVS or EMBED_FILES → parse). * deterministic_seed_produces_byte_identical_blobs — same seed produces byte-identical output, twice. This is what makes a witness-bundle (ADR-028) over trained weights meaningful. Verified by running the example with an explicit out path: cargo run -p wifi-densepose-temporal --example init_random_blob -- \ v2/target/example-output/model_init.rvne → 41244 bytes, parses clean, dtype/shape/CRC all good. What this isn't yet: - Not a trained model. Random init only. - Not a kernel forward over the blob. That requires the firmware Rust component to compile (Phase 5 — toolchain blocker). - Not wired into wifi-densepose-train. ADR-096 §8.1 flagged that the AETHER train crate doesn't currently have a temporal-axis attention; that integration is a separate piece of work. Co-Authored-By: claude-flow --- v2/crates/wifi-densepose-temporal/Cargo.toml | 4 + .../examples/init_random_blob.rs | 142 ++++++++++++++++++ .../wifi-densepose-temporal/tests/blob_e2e.rs | 114 ++++++++++++++ 3 files changed, 260 insertions(+) create mode 100644 v2/crates/wifi-densepose-temporal/examples/init_random_blob.rs create mode 100644 v2/crates/wifi-densepose-temporal/tests/blob_e2e.rs diff --git a/v2/crates/wifi-densepose-temporal/Cargo.toml b/v2/crates/wifi-densepose-temporal/Cargo.toml index 1058f27c..8d25889b 100644 --- a/v2/crates/wifi-densepose-temporal/Cargo.toml +++ b/v2/crates/wifi-densepose-temporal/Cargo.toml @@ -17,3 +17,7 @@ approx = "0.5" default = [] # Enable FP16 KV cache path (mirrors the firmware-side ADR-095 build). fp16 = [] + +[[example]] +name = "init_random_blob" +path = "examples/init_random_blob.rs" diff --git a/v2/crates/wifi-densepose-temporal/examples/init_random_blob.rs b/v2/crates/wifi-densepose-temporal/examples/init_random_blob.rs new file mode 100644 index 00000000..09e101a3 --- /dev/null +++ b/v2/crates/wifi-densepose-temporal/examples/init_random_blob.rs @@ -0,0 +1,142 @@ +// Emit a deterministic-seeded random weight blob in the .rvne format +// (ADR-095 / #513 Phase 1 of the training-side roadmap). +// +// This is a *demo*, not a trained model — the weights are PRNG output. +// Its purpose is to: +// 1. Document end-to-end how the host produces a blob (i.e. the +// example IS the recipe a real trainer follows: build a header, +// fill the weights buffer, call WeightBlob::new + .serialize(), +// write to disk). +// 2. Provide a reproducible test fixture the firmware loader can +// consume once the toolchain unblocks (ADR-095 Phase 5). +// 3. Anchor the byte-level format so refactors that change the +// output silently are caught by the byte-count assertion at +// the bottom. +// +// Usage: +// cargo run -p wifi-densepose-temporal --example init_random_blob +// cargo run -p wifi-densepose-temporal --example init_random_blob -- /tmp/model.rvne + +use std::env; +use std::fs; +use std::path::PathBuf; + +use wifi_densepose_temporal::{WeightBlob, WeightBlobHeader, WeightDtype}; + +/// Match the AETHER default head shape from +/// `TemporalHeadConfig::default_aether()` — staying coherent with the +/// crate's other defaults means a real trainer can drop this example +/// in as the starting point with one search-and-replace. +fn aether_default_header() -> WeightBlobHeader { + WeightBlobHeader { + dtype: WeightDtype::F32, + input_dim: 16, + n_q_heads: 4, + n_kv_heads: 1, // MQA — one shared K/V across the 4 query heads + head_dim: 32, + n_layers: 2, + n_classes: 4, // gesture-class default; firmware Kconfig matches + } +} + +/// Compute the raw byte count for one transformer block at the given +/// shape. This is the *intent-of-the-format* number, kept here so +/// changes to it (and to the kernel's expectation) stay in sync. +/// +/// Per-layer weights consist of: +/// - input projection : input_dim × (n_q_heads × head_dim) = Wq +/// - K projection : input_dim × (n_kv_heads × head_dim) = Wk +/// - V projection : input_dim × (n_kv_heads × head_dim) = Wv +/// - O projection : (n_q_heads × head_dim) × input_dim = Wo +fn per_layer_floats(h: &WeightBlobHeader) -> usize { + let id = h.input_dim as usize; + let q_total = h.n_q_heads as usize * h.head_dim as usize; + let kv_total = h.n_kv_heads as usize * h.head_dim as usize; + id * q_total // Wq + + id * kv_total // Wk + + id * kv_total // Wv + + q_total * id // Wo +} + +/// Plus a final classifier head: input_dim × n_classes. +fn classifier_floats(h: &WeightBlobHeader) -> usize { + h.input_dim as usize * h.n_classes as usize +} + +/// xorshift64* — tiny deterministic PRNG. Don't use for crypto; +/// this is a fixed-seed init so two runs of the example produce +/// byte-identical blobs. +fn xorshift_step(state: &mut u64) -> u64 { + let mut x = *state; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + *state = x; + x.wrapping_mul(2685821657736338717u64) +} + +/// Map the high 32 bits of a u64 to a small symmetric float in +/// [-0.1, 0.1). Tight bound so the resulting model produces sensible +/// pre-softmax logits even though it's untrained. +fn next_init_f32(state: &mut u64) -> f32 { + let bits = (xorshift_step(state) >> 32) as u32; + // Map to [0, 1) then scale to [-0.1, 0.1) + let unit = (bits as f32) / (u32::MAX as f32); + (unit - 0.5) * 0.2 +} + +fn build_random_weights(header: &WeightBlobHeader, seed: u64) -> Vec { + let total_floats = + per_layer_floats(header) * header.n_layers as usize + classifier_floats(header); + let mut out = Vec::with_capacity(total_floats * 4); + let mut state = seed; + for _ in 0..total_floats { + let f = next_init_f32(&mut state); + out.extend_from_slice(&f.to_le_bytes()); + } + out +} + +fn main() -> Result<(), Box> { + let path = env::args() + .nth(1) + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from("model_init.rvne")); + + let header = aether_default_header(); + let weights = build_random_weights(&header, 0xC511_0007_DEAD_BEEFu64); + let weights_len = weights.len(); + + let blob = WeightBlob::new(header.clone(), weights)?; + let bytes = blob.serialize(); + let serialized_len = bytes.len(); + + fs::write(&path, &bytes)?; + + // Re-parse to prove the artifact we just wrote is loadable. Same + // path the firmware loader will follow once the toolchain unblocks. + let parsed = WeightBlob::parse(&fs::read(&path)?)?; + + println!("wrote : {}", path.display()); + println!("dtype : {:?}", parsed.header.dtype); + println!( + "shape : input_dim={}, q_heads={}, kv_heads={}, head_dim={}, layers={}, classes={}", + parsed.header.input_dim, + parsed.header.n_q_heads, + parsed.header.n_kv_heads, + parsed.header.head_dim, + parsed.header.n_layers, + parsed.header.n_classes, + ); + println!( + "weights : {} bytes ({} f32 elements)", + weights_len, + weights_len / 4 + ); + println!( + "total : {} bytes (header 24 + weights {} + crc 4)", + serialized_len, weights_len + ); + + Ok(()) +} diff --git a/v2/crates/wifi-densepose-temporal/tests/blob_e2e.rs b/v2/crates/wifi-densepose-temporal/tests/blob_e2e.rs new file mode 100644 index 00000000..a2bc426a --- /dev/null +++ b/v2/crates/wifi-densepose-temporal/tests/blob_e2e.rs @@ -0,0 +1,114 @@ +//! End-to-end test: write a deterministic-seeded weight blob to disk, +//! read it back, parse it. Mirrors what the host-side training tool +//! does (training run finishes → emit .rvne) and what the firmware +//! loader will do once the toolchain unblocks (boot → mmap NVS or +//! EMBED_FILES blob → parse → run kernel). +//! +//! Sized realistically (~26 KB for the AETHER default shape) so the +//! perf and CRC paths see a meaningful payload. + +use std::fs; + +use wifi_densepose_temporal::{WeightBlob, WeightBlobHeader, WeightDtype}; + +fn aether_default_header() -> WeightBlobHeader { + WeightBlobHeader { + dtype: WeightDtype::F32, + input_dim: 16, + n_q_heads: 4, + n_kv_heads: 1, + head_dim: 32, + n_layers: 2, + n_classes: 4, + } +} + +fn xorshift_step(state: &mut u64) -> u64 { + let mut x = *state; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + *state = x; + x.wrapping_mul(2685821657736338717u64) +} + +fn deterministic_weights(byte_len: usize, seed: u64) -> Vec { + let mut out = Vec::with_capacity(byte_len); + let mut state = seed; + while out.len() < byte_len { + let bits = xorshift_step(&mut state) >> 32; + let unit = (bits as u32 as f32) / (u32::MAX as f32); + let f = (unit - 0.5) * 0.2; + out.extend_from_slice(&f.to_le_bytes()); + } + out.truncate(byte_len); + out +} + +#[test] +fn realistic_blob_roundtrips_through_filesystem() { + // AETHER default + 2 layers + classifier head: enough to exercise + // a non-trivial weights region without making the test slow. + let header = aether_default_header(); + + // Per-layer floats: input_dim*(q_heads*head_dim) for Wq, twice + // input_dim*(kv_heads*head_dim) for Wk and Wv, q_heads*head_dim*input_dim + // for Wo. Plus classifier head input_dim*n_classes. + let per_layer = (header.input_dim as usize) + * (header.n_q_heads as usize * header.head_dim as usize) + + 2 * (header.input_dim as usize) + * (header.n_kv_heads as usize * header.head_dim as usize) + + (header.n_q_heads as usize * header.head_dim as usize) + * (header.input_dim as usize); + let total_floats = per_layer * header.n_layers as usize + + header.input_dim as usize * header.n_classes as usize; + let weights_bytes = total_floats * 4; + assert!(weights_bytes > 25_000); + + let weights = deterministic_weights(weights_bytes, 0xC511_0007_DEAD_BEEFu64); + let blob = WeightBlob::new(header, weights).expect("construct"); + let serialized = blob.serialize(); + + // Filesystem leg — the realistic firmware loader path mmap or + // streaming-reads from NVS / EMBED_FILES. We use a temp file + // per platform; on Windows std::env::temp_dir() works fine. + let mut tmp = std::env::temp_dir(); + tmp.push("wifi-densepose-temporal-e2e.rvne"); + fs::write(&tmp, &serialized).expect("write"); + let read_back = fs::read(&tmp).expect("read"); + assert_eq!(read_back, serialized, "filesystem corrupted bytes"); + + let parsed = WeightBlob::parse(&read_back).expect("parse"); + assert_eq!(parsed.header.input_dim, 16); + assert_eq!(parsed.header.n_q_heads, 4); + assert_eq!(parsed.header.n_kv_heads, 1); + assert_eq!(parsed.header.head_dim, 32); + assert_eq!(parsed.header.n_layers, 2); + assert_eq!(parsed.header.n_classes, 4); + assert_eq!(parsed.weights.len(), weights_bytes); + + // Cleanup — best-effort, don't fail the test on Windows file lock. + let _ = fs::remove_file(&tmp); +} + +#[test] +fn deterministic_seed_produces_byte_identical_blobs() { + // The training script needs reproducibility — given the same + // config and seed, two runs must produce byte-identical output. + // This is what makes a witness-bundle (ADR-028) over the trained + // weights meaningful. + let header = aether_default_header(); + let bytes = 4096; + + let w1 = deterministic_weights(bytes, 0x1234u64); + let w2 = deterministic_weights(bytes, 0x1234u64); + assert_eq!(w1, w2, "PRNG not deterministic at fixed seed"); + + let blob1 = WeightBlob::new(header.clone(), w1).expect("ok"); + let blob2 = WeightBlob::new(header, w2).expect("ok"); + assert_eq!( + blob1.serialize(), + blob2.serialize(), + "serialization not deterministic" + ); +}