82 lines
2.6 KiB
Rust
82 lines
2.6 KiB
Rust
//! FNV-1a 64-bit hash for state-attribute deduplication.
|
|
//!
|
|
//! Matches Home Assistant's `db_schema.py` `fnv64a` function used to
|
|
//! fingerprint shared attribute blobs. Two state writes with identical
|
|
//! attributes share a single `state_attributes` row, reducing I/O by
|
|
//! ~80% for high-frequency polling sensors.
|
|
//!
|
|
//! ## FNV-1a 64 spec
|
|
//!
|
|
//! - Offset basis: 0xcbf29ce484222325
|
|
//! - Prime: 0x100000001b3
|
|
//! - Per byte: `hash = (hash XOR byte) * prime`
|
|
//!
|
|
//! Reference values (computed from the spec + verified against HA source):
|
|
//! - `""` (empty string) → signed i64: -3750763034362895579
|
|
//! - `"a"` → signed i64: -5808556873153909620
|
|
//! - `{"state": "on"}` → signed i64: 3947789143477681127
|
|
|
|
const FNV_OFFSET_BASIS_64: u64 = 0xcbf29ce484222325;
|
|
const FNV_PRIME_64: u64 = 0x100000001b3;
|
|
|
|
/// Compute FNV-1a 64-bit hash of `data` bytes, returned as a signed `i64`
|
|
/// suitable for direct storage in SQLite's INTEGER column.
|
|
///
|
|
/// The cast to `i64` is a bit-reinterpret, not a value conversion — the
|
|
/// same pattern HA uses in `db_schema.py`.
|
|
#[inline]
|
|
pub fn fnv64a_bytes(data: &[u8]) -> i64 {
|
|
let mut hash: u64 = FNV_OFFSET_BASIS_64;
|
|
for &byte in data {
|
|
hash ^= u64::from(byte);
|
|
hash = hash.wrapping_mul(FNV_PRIME_64);
|
|
}
|
|
hash as i64
|
|
}
|
|
|
|
/// Hash a UTF-8 string. Convenience wrapper over [`fnv64a_bytes`].
|
|
#[inline]
|
|
pub fn fnv64a_hash(s: &str) -> i64 {
|
|
fnv64a_bytes(s.as_bytes())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
/// HA reference: `fnv64a(b"")` → 0xcbf29ce484222325 (unsigned)
|
|
/// As signed i64: -3750763034362895579
|
|
#[test]
|
|
fn hash_empty_string() {
|
|
assert_eq!(fnv64a_hash(""), -3750763034362895579_i64);
|
|
}
|
|
|
|
/// HA reference: `fnv64a(b"a")` → 0xaf63dc4c8601ec8c (unsigned)
|
|
/// As signed i64: -5808556873153909620
|
|
#[test]
|
|
fn hash_single_char_a() {
|
|
assert_eq!(fnv64a_hash("a"), -5808556873153909620_i64);
|
|
}
|
|
|
|
/// Smoke-test a realistic JSON attribute blob.
|
|
/// `{"state": "on"}` → signed i64: 3947789143477681127
|
|
#[test]
|
|
fn hash_json_blob() {
|
|
assert_eq!(fnv64a_hash(r#"{"state": "on"}"#), 3947789143477681127_i64);
|
|
}
|
|
|
|
/// Different strings must produce different hashes (basic collision check).
|
|
#[test]
|
|
fn distinct_strings_differ() {
|
|
assert_ne!(fnv64a_hash("on"), fnv64a_hash("off"));
|
|
assert_ne!(fnv64a_hash("{\"brightness\":100}"), fnv64a_hash("{\"brightness\":200}"));
|
|
}
|
|
|
|
/// Deterministic: same input always gives same output.
|
|
#[test]
|
|
fn deterministic() {
|
|
let s = r#"{"unit": "C", "value": 22.5}"#;
|
|
assert_eq!(fnv64a_hash(s), fnv64a_hash(s));
|
|
}
|
|
}
|