feat(temporal): init_random_blob example + filesystem e2e tests (#513)
Closes the host→file→firmware loop on the Phase 1 weight format. Real
.rvne artifact emitted from the example, parsed back through filesystem
in the e2e test, byte-identical across two seeded runs.
- examples/init_random_blob.rs — produces a 41,244-byte deployable blob
matching the AETHER default head shape (input_dim=16, q_heads=4,
kv_heads=1 [MQA], head_dim=32, layers=2, classes=4 — staying coherent
with TemporalHeadConfig::default_aether so a real trainer can drop
in this shape with one search-and-replace). Uses xorshift64* with a
fixed seed (0xC511_0007_DEAD_BEEF) for reproducibility.
Per-layer weight count derivation lives in the example (Wq + Wk +
Wv + Wo, plus a final classifier head) so the kernel's expectation
is anchored in code rather than a comment that drifts.
- tests/blob_e2e.rs — two new tests, 15/15 total now passing:
* realistic_blob_roundtrips_through_filesystem — writes a 25+ KB
blob to std::env::temp_dir(), reads it back, parses, validates.
Mirrors what the firmware loader will do once the toolchain
unblocks (mmap NVS or EMBED_FILES → parse).
* deterministic_seed_produces_byte_identical_blobs — same seed
produces byte-identical output, twice. This is what makes a
witness-bundle (ADR-028) over trained weights meaningful.
Verified by running the example with an explicit out path:
cargo run -p wifi-densepose-temporal --example init_random_blob -- \
v2/target/example-output/model_init.rvne
→ 41244 bytes, parses clean, dtype/shape/CRC all good.
What this isn't yet:
- Not a trained model. Random init only.
- Not a kernel forward over the blob. That requires the firmware
Rust component to compile (Phase 5 — toolchain blocker).
- Not wired into wifi-densepose-train. ADR-096 §8.1 flagged that
the AETHER train crate doesn't currently have a temporal-axis
attention; that integration is a separate piece of work.
Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
237325a117
commit
73321db765
|
|
@ -17,3 +17,7 @@ approx = "0.5"
|
|||
default = []
|
||||
# Enable FP16 KV cache path (mirrors the firmware-side ADR-095 build).
|
||||
fp16 = []
|
||||
|
||||
[[example]]
|
||||
name = "init_random_blob"
|
||||
path = "examples/init_random_blob.rs"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,142 @@
|
|||
// Emit a deterministic-seeded random weight blob in the .rvne format
|
||||
// (ADR-095 / #513 Phase 1 of the training-side roadmap).
|
||||
//
|
||||
// This is a *demo*, not a trained model — the weights are PRNG output.
|
||||
// Its purpose is to:
|
||||
// 1. Document end-to-end how the host produces a blob (i.e. the
|
||||
// example IS the recipe a real trainer follows: build a header,
|
||||
// fill the weights buffer, call WeightBlob::new + .serialize(),
|
||||
// write to disk).
|
||||
// 2. Provide a reproducible test fixture the firmware loader can
|
||||
// consume once the toolchain unblocks (ADR-095 Phase 5).
|
||||
// 3. Anchor the byte-level format so refactors that change the
|
||||
// output silently are caught by the byte-count assertion at
|
||||
// the bottom.
|
||||
//
|
||||
// Usage:
|
||||
// cargo run -p wifi-densepose-temporal --example init_random_blob
|
||||
// cargo run -p wifi-densepose-temporal --example init_random_blob -- /tmp/model.rvne
|
||||
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use wifi_densepose_temporal::{WeightBlob, WeightBlobHeader, WeightDtype};
|
||||
|
||||
/// Match the AETHER default head shape from
|
||||
/// `TemporalHeadConfig::default_aether()` — staying coherent with the
|
||||
/// crate's other defaults means a real trainer can drop this example
|
||||
/// in as the starting point with one search-and-replace.
|
||||
fn aether_default_header() -> WeightBlobHeader {
|
||||
WeightBlobHeader {
|
||||
dtype: WeightDtype::F32,
|
||||
input_dim: 16,
|
||||
n_q_heads: 4,
|
||||
n_kv_heads: 1, // MQA — one shared K/V across the 4 query heads
|
||||
head_dim: 32,
|
||||
n_layers: 2,
|
||||
n_classes: 4, // gesture-class default; firmware Kconfig matches
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the raw byte count for one transformer block at the given
|
||||
/// shape. This is the *intent-of-the-format* number, kept here so
|
||||
/// changes to it (and to the kernel's expectation) stay in sync.
|
||||
///
|
||||
/// Per-layer weights consist of:
|
||||
/// - input projection : input_dim × (n_q_heads × head_dim) = Wq
|
||||
/// - K projection : input_dim × (n_kv_heads × head_dim) = Wk
|
||||
/// - V projection : input_dim × (n_kv_heads × head_dim) = Wv
|
||||
/// - O projection : (n_q_heads × head_dim) × input_dim = Wo
|
||||
fn per_layer_floats(h: &WeightBlobHeader) -> usize {
|
||||
let id = h.input_dim as usize;
|
||||
let q_total = h.n_q_heads as usize * h.head_dim as usize;
|
||||
let kv_total = h.n_kv_heads as usize * h.head_dim as usize;
|
||||
id * q_total // Wq
|
||||
+ id * kv_total // Wk
|
||||
+ id * kv_total // Wv
|
||||
+ q_total * id // Wo
|
||||
}
|
||||
|
||||
/// Plus a final classifier head: input_dim × n_classes.
|
||||
fn classifier_floats(h: &WeightBlobHeader) -> usize {
|
||||
h.input_dim as usize * h.n_classes as usize
|
||||
}
|
||||
|
||||
/// xorshift64* — tiny deterministic PRNG. Don't use for crypto;
|
||||
/// this is a fixed-seed init so two runs of the example produce
|
||||
/// byte-identical blobs.
|
||||
fn xorshift_step(state: &mut u64) -> u64 {
|
||||
let mut x = *state;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 7;
|
||||
x ^= x << 17;
|
||||
*state = x;
|
||||
x.wrapping_mul(2685821657736338717u64)
|
||||
}
|
||||
|
||||
/// Map the high 32 bits of a u64 to a small symmetric float in
|
||||
/// [-0.1, 0.1). Tight bound so the resulting model produces sensible
|
||||
/// pre-softmax logits even though it's untrained.
|
||||
fn next_init_f32(state: &mut u64) -> f32 {
|
||||
let bits = (xorshift_step(state) >> 32) as u32;
|
||||
// Map to [0, 1) then scale to [-0.1, 0.1)
|
||||
let unit = (bits as f32) / (u32::MAX as f32);
|
||||
(unit - 0.5) * 0.2
|
||||
}
|
||||
|
||||
fn build_random_weights(header: &WeightBlobHeader, seed: u64) -> Vec<u8> {
|
||||
let total_floats =
|
||||
per_layer_floats(header) * header.n_layers as usize + classifier_floats(header);
|
||||
let mut out = Vec::with_capacity(total_floats * 4);
|
||||
let mut state = seed;
|
||||
for _ in 0..total_floats {
|
||||
let f = next_init_f32(&mut state);
|
||||
out.extend_from_slice(&f.to_le_bytes());
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let path = env::args()
|
||||
.nth(1)
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|| PathBuf::from("model_init.rvne"));
|
||||
|
||||
let header = aether_default_header();
|
||||
let weights = build_random_weights(&header, 0xC511_0007_DEAD_BEEFu64);
|
||||
let weights_len = weights.len();
|
||||
|
||||
let blob = WeightBlob::new(header.clone(), weights)?;
|
||||
let bytes = blob.serialize();
|
||||
let serialized_len = bytes.len();
|
||||
|
||||
fs::write(&path, &bytes)?;
|
||||
|
||||
// Re-parse to prove the artifact we just wrote is loadable. Same
|
||||
// path the firmware loader will follow once the toolchain unblocks.
|
||||
let parsed = WeightBlob::parse(&fs::read(&path)?)?;
|
||||
|
||||
println!("wrote : {}", path.display());
|
||||
println!("dtype : {:?}", parsed.header.dtype);
|
||||
println!(
|
||||
"shape : input_dim={}, q_heads={}, kv_heads={}, head_dim={}, layers={}, classes={}",
|
||||
parsed.header.input_dim,
|
||||
parsed.header.n_q_heads,
|
||||
parsed.header.n_kv_heads,
|
||||
parsed.header.head_dim,
|
||||
parsed.header.n_layers,
|
||||
parsed.header.n_classes,
|
||||
);
|
||||
println!(
|
||||
"weights : {} bytes ({} f32 elements)",
|
||||
weights_len,
|
||||
weights_len / 4
|
||||
);
|
||||
println!(
|
||||
"total : {} bytes (header 24 + weights {} + crc 4)",
|
||||
serialized_len, weights_len
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -0,0 +1,114 @@
|
|||
//! End-to-end test: write a deterministic-seeded weight blob to disk,
|
||||
//! read it back, parse it. Mirrors what the host-side training tool
|
||||
//! does (training run finishes → emit .rvne) and what the firmware
|
||||
//! loader will do once the toolchain unblocks (boot → mmap NVS or
|
||||
//! EMBED_FILES blob → parse → run kernel).
|
||||
//!
|
||||
//! Sized realistically (~26 KB for the AETHER default shape) so the
|
||||
//! perf and CRC paths see a meaningful payload.
|
||||
|
||||
use std::fs;
|
||||
|
||||
use wifi_densepose_temporal::{WeightBlob, WeightBlobHeader, WeightDtype};
|
||||
|
||||
fn aether_default_header() -> WeightBlobHeader {
|
||||
WeightBlobHeader {
|
||||
dtype: WeightDtype::F32,
|
||||
input_dim: 16,
|
||||
n_q_heads: 4,
|
||||
n_kv_heads: 1,
|
||||
head_dim: 32,
|
||||
n_layers: 2,
|
||||
n_classes: 4,
|
||||
}
|
||||
}
|
||||
|
||||
fn xorshift_step(state: &mut u64) -> u64 {
|
||||
let mut x = *state;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 7;
|
||||
x ^= x << 17;
|
||||
*state = x;
|
||||
x.wrapping_mul(2685821657736338717u64)
|
||||
}
|
||||
|
||||
fn deterministic_weights(byte_len: usize, seed: u64) -> Vec<u8> {
|
||||
let mut out = Vec::with_capacity(byte_len);
|
||||
let mut state = seed;
|
||||
while out.len() < byte_len {
|
||||
let bits = xorshift_step(&mut state) >> 32;
|
||||
let unit = (bits as u32 as f32) / (u32::MAX as f32);
|
||||
let f = (unit - 0.5) * 0.2;
|
||||
out.extend_from_slice(&f.to_le_bytes());
|
||||
}
|
||||
out.truncate(byte_len);
|
||||
out
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn realistic_blob_roundtrips_through_filesystem() {
|
||||
// AETHER default + 2 layers + classifier head: enough to exercise
|
||||
// a non-trivial weights region without making the test slow.
|
||||
let header = aether_default_header();
|
||||
|
||||
// Per-layer floats: input_dim*(q_heads*head_dim) for Wq, twice
|
||||
// input_dim*(kv_heads*head_dim) for Wk and Wv, q_heads*head_dim*input_dim
|
||||
// for Wo. Plus classifier head input_dim*n_classes.
|
||||
let per_layer = (header.input_dim as usize)
|
||||
* (header.n_q_heads as usize * header.head_dim as usize)
|
||||
+ 2 * (header.input_dim as usize)
|
||||
* (header.n_kv_heads as usize * header.head_dim as usize)
|
||||
+ (header.n_q_heads as usize * header.head_dim as usize)
|
||||
* (header.input_dim as usize);
|
||||
let total_floats = per_layer * header.n_layers as usize
|
||||
+ header.input_dim as usize * header.n_classes as usize;
|
||||
let weights_bytes = total_floats * 4;
|
||||
assert!(weights_bytes > 25_000);
|
||||
|
||||
let weights = deterministic_weights(weights_bytes, 0xC511_0007_DEAD_BEEFu64);
|
||||
let blob = WeightBlob::new(header, weights).expect("construct");
|
||||
let serialized = blob.serialize();
|
||||
|
||||
// Filesystem leg — the realistic firmware loader path mmap or
|
||||
// streaming-reads from NVS / EMBED_FILES. We use a temp file
|
||||
// per platform; on Windows std::env::temp_dir() works fine.
|
||||
let mut tmp = std::env::temp_dir();
|
||||
tmp.push("wifi-densepose-temporal-e2e.rvne");
|
||||
fs::write(&tmp, &serialized).expect("write");
|
||||
let read_back = fs::read(&tmp).expect("read");
|
||||
assert_eq!(read_back, serialized, "filesystem corrupted bytes");
|
||||
|
||||
let parsed = WeightBlob::parse(&read_back).expect("parse");
|
||||
assert_eq!(parsed.header.input_dim, 16);
|
||||
assert_eq!(parsed.header.n_q_heads, 4);
|
||||
assert_eq!(parsed.header.n_kv_heads, 1);
|
||||
assert_eq!(parsed.header.head_dim, 32);
|
||||
assert_eq!(parsed.header.n_layers, 2);
|
||||
assert_eq!(parsed.header.n_classes, 4);
|
||||
assert_eq!(parsed.weights.len(), weights_bytes);
|
||||
|
||||
// Cleanup — best-effort, don't fail the test on Windows file lock.
|
||||
let _ = fs::remove_file(&tmp);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deterministic_seed_produces_byte_identical_blobs() {
|
||||
// The training script needs reproducibility — given the same
|
||||
// config and seed, two runs must produce byte-identical output.
|
||||
// This is what makes a witness-bundle (ADR-028) over the trained
|
||||
// weights meaningful.
|
||||
let header = aether_default_header();
|
||||
let bytes = 4096;
|
||||
|
||||
let w1 = deterministic_weights(bytes, 0x1234u64);
|
||||
let w2 = deterministic_weights(bytes, 0x1234u64);
|
||||
assert_eq!(w1, w2, "PRNG not deterministic at fixed seed");
|
||||
|
||||
let blob1 = WeightBlob::new(header.clone(), w1).expect("ok");
|
||||
let blob2 = WeightBlob::new(header, w2).expect("ok");
|
||||
assert_eq!(
|
||||
blob1.serialize(),
|
||||
blob2.serialize(),
|
||||
"serialization not deterministic"
|
||||
);
|
||||
}
|
||||
Loading…
Reference in New Issue