From ea98ceb335ea91ae64a3f77f9508ac9ebcf67340 Mon Sep 17 00:00:00 2001 From: ruv Date: Sun, 24 May 2026 16:18:33 -0400 Subject: [PATCH] feat(adr-118/p3.6): IdentityFeatures canonical-bytes encoder (137/137 GREEN) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Iter 18. Consolidates the embedding-vs-risk-factor hashing-input selection behind a single typed API. Replaces the two ad-hoc paths that lived in emitter.rs through iter 17: * inline `emb.as_slice().iter().flat_map(|f| f.to_le_bytes())` * private `canonical_risk_bytes(&inputs) -> [u8; 16]` Added (gated on `feature = "std"`): - src/identity_features.rs: * IdentityFeatures<'a> enum: Embedding(&'a IdentityEmbedding) | RiskFactors { sep, stab, consist, conf } * from_embedding / from_risk_factors const constructors * canonical_byte_len() const fn — no allocation, predicts wire length * write_canonical_bytes(&mut Vec) — reusable-buffer path * canonical_bytes() -> Vec — allocating convenience * compute_hash(&SignatureHasher, day_epoch) -> [u8; 32] * RISK_FACTOR_BYTES const (= 16) - pub use IdentityFeatures, RISK_FACTOR_BYTES from lib.rs Refactor: - src/emitter.rs: derived_hash now uses let features = match &embedding { Some(emb) => IdentityFeatures::from_embedding(emb), None => IdentityFeatures::from_risk_factors(sep, stab, consist, conf), }; features.compute_hash(h, day_epoch) Local canonical_risk_bytes helper removed (superseded). tests/identity_features_encoder.rs (9 named tests, all green): embedding_canonical_length_is_dim_times_four risk_factor_canonical_length_is_sixteen_bytes embedding_canonical_bytes_match_manual_flatten risk_factor_canonical_bytes_match_explicit_le_layout write_canonical_bytes_appends_to_existing_buffer compute_hash_matches_direct_hasher_invocation embedding_and_risk_factors_produce_different_hashes iter_16_wire_compat_embedding_path *** backward-compat regression *** iter_16_wire_compat_risk_factor_path *** backward-compat regression *** These two tests assert that the refactored encoder produces bit-identical hashes to iter 16's inline path. Existing deployed nodes upgrading to iter 18 see no rf_signature_hash flip. ACs progressed: - ADR-120 §2.3 — features canonical-bytes representation now has a single source of truth in the codebase; future feature additions pass through one named encoder rather than scattered byte-fiddling. - ADR-118 invariant I2 — IdentityFeatures borrows &IdentityEmbedding, it doesn't take ownership. The embedding's Drop / no-Serialize guarantees continue to hold across the canonical-bytes path. Test config: - cargo test --no-default-features → 72 passed (identity_features cfg-out) - cargo test → 137 passed (128 + 9) Out of scope (next iter target): - Wire IdentityFeatures into a public emitter input path so callers can supply pre-constructed IdentityFeatures rather than the bare embedding + risk factors. (Soft refactor; current API is sufficient.) - BfldPipeline facade — single struct combining BfldEmitter + BfldFrame producer + MQTT publisher (ADR-118 §2.1 lib.rs entry point). Co-Authored-By: claude-flow --- v2/crates/wifi-densepose-bfld/src/emitter.rs | 42 +++--- .../src/identity_features.rs | 116 +++++++++++++++ v2/crates/wifi-densepose-bfld/src/lib.rs | 4 + .../tests/identity_features_encoder.rs | 139 ++++++++++++++++++ 4 files changed, 278 insertions(+), 23 deletions(-) create mode 100644 v2/crates/wifi-densepose-bfld/src/identity_features.rs create mode 100644 v2/crates/wifi-densepose-bfld/tests/identity_features_encoder.rs diff --git a/v2/crates/wifi-densepose-bfld/src/emitter.rs b/v2/crates/wifi-densepose-bfld/src/emitter.rs index f89777ae..15999886 100644 --- a/v2/crates/wifi-densepose-bfld/src/emitter.rs +++ b/v2/crates/wifi-densepose-bfld/src/emitter.rs @@ -19,6 +19,7 @@ use crate::coherence_gate::{CoherenceGate, NullOracle, SoulMatchOracle}; use crate::embedding_ring::EmbeddingRing; +use crate::identity_features::IdentityFeatures; use crate::identity_risk::{score, GateAction}; use crate::signature_hasher::SignatureHasher; use crate::{BfldEvent, IdentityEmbedding, PrivacyClass}; @@ -142,21 +143,23 @@ impl BfldEmitter { ) -> Option { let risk = score(inputs.sep, inputs.stab, inputs.consist, inputs.risk_conf); - // Compute the derived rf_signature_hash BEFORE moving `embedding` into - // the ring. Derived hash uses the embedding bytes when present and - // falls back to the canonical risk-factor bytes otherwise. + // Compute the derived rf_signature_hash BEFORE moving `embedding` + // into the ring. The IdentityFeatures encoder (iter 18) consolidates + // the embedding vs risk-factor selection behind a single canonical- + // bytes path; same wire bytes as the iter-16 inline encoding. let derived_hash: Option<[u8; 32]> = self.signature_hasher.as_ref().map(|h| { let unix_secs = inputs.timestamp_ns / NS_PER_SEC; - if let Some(emb) = &embedding { - let bytes: Vec = emb - .as_slice() - .iter() - .flat_map(|f| f.to_le_bytes()) - .collect(); - h.compute_at(unix_secs, &bytes) - } else { - h.compute_at(unix_secs, &canonical_risk_bytes(&inputs)) - } + let day_epoch = SignatureHasher::day_epoch_from_unix_secs(unix_secs); + let features = match &embedding { + Some(emb) => IdentityFeatures::from_embedding(emb), + None => IdentityFeatures::from_risk_factors( + inputs.sep, + inputs.stab, + inputs.consist, + inputs.risk_conf, + ), + }; + features.compute_hash(h, day_epoch) }); if let Some(emb) = embedding { @@ -204,13 +207,6 @@ impl BfldEmitter { } } -/// Canonical byte layout for the risk-factor tuple. Used by the hasher -/// fallback when no embedding is supplied. -fn canonical_risk_bytes(inputs: &SensingInputs) -> [u8; 16] { - let mut buf = [0u8; 16]; - buf[0..4].copy_from_slice(&inputs.sep.to_le_bytes()); - buf[4..8].copy_from_slice(&inputs.stab.to_le_bytes()); - buf[8..12].copy_from_slice(&inputs.consist.to_le_bytes()); - buf[12..16].copy_from_slice(&inputs.risk_conf.to_le_bytes()); - buf -} +// canonical_risk_bytes removed in iter 18 — superseded by +// IdentityFeatures::from_risk_factors().canonical_bytes() which uses the +// same little-endian f32 layout. diff --git a/v2/crates/wifi-densepose-bfld/src/identity_features.rs b/v2/crates/wifi-densepose-bfld/src/identity_features.rs new file mode 100644 index 00000000..8d45d861 --- /dev/null +++ b/v2/crates/wifi-densepose-bfld/src/identity_features.rs @@ -0,0 +1,116 @@ +//! `IdentityFeatures` — typed canonical-bytes encoder for `SignatureHasher`. +//! +//! Wraps the two possible feature sources (a borrowed [`IdentityEmbedding`] or +//! the four-tuple of risk factors) behind a single API so callers don't need +//! to know which one ultimately feeds the BLAKE3 keyed hash. Replaces the +//! ad-hoc `canonical_risk_bytes` + inline embedding-flatten paths that lived +//! in `emitter.rs` through iter 17. +//! +//! Borrowing semantics: +//! - `IdentityFeatures::Embedding(&IdentityEmbedding)` is the **preferred** +//! source — it carries the AETHER cluster identity directly. +//! - `IdentityFeatures::RiskFactors { .. }` is the fallback used when the +//! per-frame embedding is unavailable. +//! +//! Both variants emit canonical little-endian f32 bytes. Embedding produces +//! `EMBEDDING_DIM * 4` bytes (512 by default); risk factors produce +//! [`RISK_FACTOR_BYTES`] bytes (16). + +#![cfg(feature = "std")] + +use crate::signature_hasher::{SignatureHasher, RF_SIGNATURE_LEN}; +use crate::{IdentityEmbedding, EMBEDDING_DIM}; + +/// Wire-form length for the `RiskFactors` variant (4 × f32 little-endian). +pub const RISK_FACTOR_BYTES: usize = 16; + +/// Borrowed feature source for the signature hasher. +#[derive(Debug)] +pub enum IdentityFeatures<'a> { + /// Preferred: a borrowed identity embedding. The embedding stays in-RAM + /// (invariant I2) — this enum holds only a reference. + Embedding(&'a IdentityEmbedding), + /// Fallback: the four risk-score factors. Less identity-stable than the + /// embedding, but always available even when the encoder is offline. + RiskFactors { + /// `identity_separability_score`. + sep: f32, + /// `temporal_stability`. + stab: f32, + /// `cross_perspective_consistency`. + consist: f32, + /// Risk-score sample confidence factor. + conf: f32, + }, +} + +impl<'a> IdentityFeatures<'a> { + /// Build from a borrowed embedding (preferred path). + #[must_use] + pub const fn from_embedding(emb: &'a IdentityEmbedding) -> Self { + Self::Embedding(emb) + } + + /// Build from the risk-factor four-tuple (fallback path). + #[must_use] + pub const fn from_risk_factors(sep: f32, stab: f32, consist: f32, conf: f32) -> Self { + Self::RiskFactors { + sep, + stab, + consist, + conf, + } + } + + /// Predicted wire length without allocating. + #[must_use] + pub const fn canonical_byte_len(&self) -> usize { + match self { + Self::Embedding(_) => EMBEDDING_DIM * 4, + Self::RiskFactors { .. } => RISK_FACTOR_BYTES, + } + } + + /// Append canonical little-endian bytes to `out`. Useful for callers that + /// already own a buffer (avoids the `canonical_bytes` allocation). + pub fn write_canonical_bytes(&self, out: &mut Vec) { + out.reserve(self.canonical_byte_len()); + match self { + Self::Embedding(emb) => { + for f in emb.as_slice() { + out.extend_from_slice(&f.to_le_bytes()); + } + } + Self::RiskFactors { + sep, + stab, + consist, + conf, + } => { + out.extend_from_slice(&sep.to_le_bytes()); + out.extend_from_slice(&stab.to_le_bytes()); + out.extend_from_slice(&consist.to_le_bytes()); + out.extend_from_slice(&conf.to_le_bytes()); + } + } + } + + /// Allocating convenience wrapper around [`Self::write_canonical_bytes`]. + #[must_use] + pub fn canonical_bytes(&self) -> Vec { + let mut v = Vec::with_capacity(self.canonical_byte_len()); + self.write_canonical_bytes(&mut v); + v + } + + /// Drive `hasher` with this feature source at the given `day_epoch`. The + /// returned hash is what the emitter publishes as `rf_signature_hash`. + #[must_use] + pub fn compute_hash( + &self, + hasher: &SignatureHasher, + day_epoch: u32, + ) -> [u8; RF_SIGNATURE_LEN] { + hasher.compute(day_epoch, &self.canonical_bytes()) + } +} diff --git a/v2/crates/wifi-densepose-bfld/src/lib.rs b/v2/crates/wifi-densepose-bfld/src/lib.rs index 609768f7..b17e3ddc 100644 --- a/v2/crates/wifi-densepose-bfld/src/lib.rs +++ b/v2/crates/wifi-densepose-bfld/src/lib.rs @@ -21,6 +21,8 @@ pub mod emitter; #[cfg(feature = "std")] pub mod event; pub mod frame; +#[cfg(feature = "std")] +pub mod identity_features; pub mod identity_risk; #[cfg(feature = "std")] pub mod payload; @@ -36,6 +38,8 @@ pub use emitter::{BfldEmitter, SensingInputs}; pub use event::BfldEvent; pub use embedding::{IdentityEmbedding, EMBEDDING_DIM}; pub use embedding_ring::{EmbeddingRing, RING_CAPACITY}; +#[cfg(feature = "std")] +pub use identity_features::{IdentityFeatures, RISK_FACTOR_BYTES}; pub use identity_risk::{score as identity_risk_score, GateAction}; pub use frame::{BfldFrameHeader, BFLD_MAGIC, BFLD_VERSION, BFLD_HEADER_SIZE}; #[cfg(feature = "std")] diff --git a/v2/crates/wifi-densepose-bfld/tests/identity_features_encoder.rs b/v2/crates/wifi-densepose-bfld/tests/identity_features_encoder.rs new file mode 100644 index 00000000..aa877133 --- /dev/null +++ b/v2/crates/wifi-densepose-bfld/tests/identity_features_encoder.rs @@ -0,0 +1,139 @@ +//! Acceptance tests for ADR-120 §2.3 — `IdentityFeatures` canonical-bytes encoder. + +#![cfg(feature = "std")] + +use wifi_densepose_bfld::{ + IdentityEmbedding, IdentityFeatures, SignatureHasher, EMBEDDING_DIM, RISK_FACTOR_BYTES, + SITE_SALT_LEN, +}; + +fn embedding(seed: f32) -> IdentityEmbedding { + let mut a = [0.0f32; EMBEDDING_DIM]; + for (i, v) in a.iter_mut().enumerate() { + *v = seed + (i as f32) * 0.001; + } + IdentityEmbedding::from_raw(a) +} + +fn salt() -> [u8; SITE_SALT_LEN] { + [42u8; SITE_SALT_LEN] +} + +// --- byte layout ---------------------------------------------------------- + +#[test] +fn embedding_canonical_length_is_dim_times_four() { + let emb = embedding(0.5); + let f = IdentityFeatures::from_embedding(&emb); + assert_eq!(f.canonical_byte_len(), EMBEDDING_DIM * 4); + assert_eq!(f.canonical_bytes().len(), EMBEDDING_DIM * 4); +} + +#[test] +fn risk_factor_canonical_length_is_sixteen_bytes() { + let f = IdentityFeatures::from_risk_factors(0.1, 0.2, 0.3, 0.4); + assert_eq!(f.canonical_byte_len(), RISK_FACTOR_BYTES); + assert_eq!(f.canonical_byte_len(), 16); + assert_eq!(f.canonical_bytes().len(), 16); +} + +#[test] +fn embedding_canonical_bytes_match_manual_flatten() { + let emb = embedding(0.7); + let f = IdentityFeatures::from_embedding(&emb); + let actual = f.canonical_bytes(); + let expected: Vec = emb.as_slice().iter().flat_map(|x| x.to_le_bytes()).collect(); + assert_eq!(actual, expected); +} + +#[test] +fn risk_factor_canonical_bytes_match_explicit_le_layout() { + let f = IdentityFeatures::from_risk_factors(0.1, 0.2, 0.3, 0.4); + let actual = f.canonical_bytes(); + let mut expected = Vec::with_capacity(16); + expected.extend_from_slice(&0.1f32.to_le_bytes()); + expected.extend_from_slice(&0.2f32.to_le_bytes()); + expected.extend_from_slice(&0.3f32.to_le_bytes()); + expected.extend_from_slice(&0.4f32.to_le_bytes()); + assert_eq!(actual, expected); +} + +#[test] +fn write_canonical_bytes_appends_to_existing_buffer() { + let f = IdentityFeatures::from_risk_factors(1.0, 2.0, 3.0, 4.0); + let mut buf = vec![0xAA, 0xBB]; + f.write_canonical_bytes(&mut buf); + assert_eq!(buf.len(), 2 + 16); + assert_eq!(&buf[..2], &[0xAA, 0xBB]); +} + +// --- hash integration ---------------------------------------------------- + +#[test] +fn compute_hash_matches_direct_hasher_invocation() { + let h = SignatureHasher::new(salt()); + let emb = embedding(0.5); + let f = IdentityFeatures::from_embedding(&emb); + let via_features = f.compute_hash(&h, 100); + let via_direct = h.compute(100, &f.canonical_bytes()); + assert_eq!(via_features, via_direct); +} + +#[test] +fn embedding_and_risk_factors_produce_different_hashes() { + let h = SignatureHasher::new(salt()); + let emb = embedding(0.5); + let from_emb = IdentityFeatures::from_embedding(&emb).compute_hash(&h, 100); + let from_rf = IdentityFeatures::from_risk_factors(0.5, 0.5, 0.5, 0.5).compute_hash(&h, 100); + assert_ne!( + from_emb, from_rf, + "embedding and risk-factor encoders must produce distinct hashes", + ); +} + +// --- backward compatibility regression (iter 16 wire format) ------------- + +/// Iter 16 used inline `emb.as_slice().iter().flat_map(|f| f.to_le_bytes())` +/// for the embedding path. Iter 18's IdentityFeatures must produce the +/// exact same hash for the same (salt, day, embedding) tuple — otherwise +/// existing nodes would silently flip their `rf_signature_hash` value on +/// upgrade. +#[test] +fn iter_16_wire_compat_embedding_path() { + let h = SignatureHasher::new(salt()); + let emb = embedding(0.9); + let day_epoch = 12345; + + // Iter 16 manual computation: + let bytes_v16: Vec = emb.as_slice().iter().flat_map(|f| f.to_le_bytes()).collect(); + let hash_v16 = h.compute(day_epoch, &bytes_v16); + + // Iter 18 IdentityFeatures path: + let hash_v18 = IdentityFeatures::from_embedding(&emb).compute_hash(&h, day_epoch); + + assert_eq!( + hash_v16, hash_v18, + "iter 18 must produce iter-16 wire-compatible hashes", + ); +} + +#[test] +fn iter_16_wire_compat_risk_factor_path() { + let h = SignatureHasher::new(salt()); + let day_epoch = 12345; + let (sep, stab, consist, conf) = (0.1f32, 0.2f32, 0.3f32, 0.4f32); + + // Iter 16 manual computation: + let mut buf_v16 = [0u8; 16]; + buf_v16[0..4].copy_from_slice(&sep.to_le_bytes()); + buf_v16[4..8].copy_from_slice(&stab.to_le_bytes()); + buf_v16[8..12].copy_from_slice(&consist.to_le_bytes()); + buf_v16[12..16].copy_from_slice(&conf.to_le_bytes()); + let hash_v16 = h.compute(day_epoch, &buf_v16); + + // Iter 18 path: + let hash_v18 = + IdentityFeatures::from_risk_factors(sep, stab, consist, conf).compute_hash(&h, day_epoch); + + assert_eq!(hash_v16, hash_v18); +}