wifi-densepose/vendor/ruvector/examples/ruvLLM/esp32/src/embedding.rs

334 lines
9.7 KiB
Rust

//! Embedding operations for ESP32
//!
//! Provides efficient token embedding lookup and positional encoding.
use heapless::Vec as HVec;
/// Maximum embedding dimension
pub const MAX_EMBED_DIM: usize = 128;
/// Maximum vocabulary size for stack allocation
pub const MAX_VOCAB: usize = 2048;
/// Embedding table with INT8 quantization
pub struct EmbeddingTable<const VOCAB: usize, const DIM: usize> {
/// Flattened embedding weights [VOCAB * DIM]
weights: HVec<i8, { 64 * 1024 }>, // Max 64KB
/// Vocabulary size
vocab_size: usize,
/// Embedding dimension
embed_dim: usize,
/// Scale factor for dequantization
scale: f32,
}
impl<const VOCAB: usize, const DIM: usize> EmbeddingTable<VOCAB, DIM> {
/// Create new embedding table from weights
pub fn new(weights: &[i8], vocab_size: usize, embed_dim: usize) -> crate::Result<Self> {
if weights.len() != vocab_size * embed_dim {
return Err(crate::Error::InvalidModel("Weight size mismatch"));
}
let mut table_weights = HVec::new();
for &w in weights {
table_weights.push(w).map_err(|_| crate::Error::BufferOverflow)?;
}
Ok(Self {
weights: table_weights,
vocab_size,
embed_dim,
scale: 1.0 / 127.0,
})
}
/// Create random embedding table for testing
pub fn random(vocab_size: usize, embed_dim: usize, seed: u32) -> crate::Result<Self> {
let mut weights = HVec::new();
let mut rng_state = seed;
for _ in 0..(vocab_size * embed_dim) {
rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
let val = ((rng_state >> 16) & 0xFF) as i8;
weights.push(val).map_err(|_| crate::Error::BufferOverflow)?;
}
Ok(Self {
weights,
vocab_size,
embed_dim,
scale: 1.0 / 127.0,
})
}
/// Look up embedding for a token
#[inline]
pub fn lookup(&self, token_id: u16, output: &mut [i8]) -> crate::Result<()> {
let id = token_id as usize;
if id >= self.vocab_size {
return Err(crate::Error::InvalidModel("Token ID out of range"));
}
let start = id * self.embed_dim;
let end = start + self.embed_dim;
if output.len() < self.embed_dim {
return Err(crate::Error::BufferOverflow);
}
output[..self.embed_dim].copy_from_slice(&self.weights[start..end]);
Ok(())
}
/// Look up embedding and add to existing buffer (for accumulation)
#[inline]
pub fn lookup_add(&self, token_id: u16, output: &mut [i32]) -> crate::Result<()> {
let id = token_id as usize;
if id >= self.vocab_size {
return Err(crate::Error::InvalidModel("Token ID out of range"));
}
let start = id * self.embed_dim;
for i in 0..self.embed_dim {
output[i] += self.weights[start + i] as i32;
}
Ok(())
}
/// Memory size in bytes
pub fn memory_size(&self) -> usize {
self.weights.len()
}
}
/// Rotary Position Embedding (RoPE) for ESP32
///
/// Uses fixed-point arithmetic for sin/cos computation.
pub struct RotaryEmbedding {
/// Dimension (must be even)
dim: usize,
/// Base frequency
base: u32,
/// Precomputed sin values (fixed-point, scaled by 128)
sin_cache: [i8; MAX_EMBED_DIM],
/// Precomputed cos values (fixed-point, scaled by 128)
cos_cache: [i8; MAX_EMBED_DIM],
/// Maximum cached position
max_cached_pos: usize,
}
impl RotaryEmbedding {
/// Create new RoPE with given dimension
pub fn new(dim: usize, base: u32) -> Self {
Self {
dim,
base,
sin_cache: [0i8; MAX_EMBED_DIM],
cos_cache: [0i8; MAX_EMBED_DIM],
max_cached_pos: 0,
}
}
/// Update cache for new position
pub fn update_cache(&mut self, pos: usize) {
if pos <= self.max_cached_pos {
return;
}
// Compute frequency for each dimension pair
for i in 0..(self.dim / 2) {
// freq = 1 / (base^(2i/dim))
// For INT8, we approximate using lookup table or simple formula
// Simplified: use position-dependent rotation
// angle = pos / (base^(i / (dim/2)))
let freq_scale = ((i * 256) / (self.dim / 2)) as u32;
let angle = ((pos as u32 * 256) / (self.base + freq_scale)) as i32;
// Approximate sin/cos using polynomial
// sin(x) ≈ x - x³/6 for small x (scaled)
// cos(x) ≈ 1 - x²/2 for small x (scaled)
let x = (angle % 256) as i32 - 128; // Center around 0
// Simple quadrant-based approximation
let sin_val = (x * 127 / 128).clamp(-127, 127) as i8;
let cos_val = ((128 - x.abs()) * 127 / 128).clamp(-127, 127) as i8;
self.sin_cache[i] = sin_val;
self.cos_cache[i] = cos_val;
self.sin_cache[i + self.dim / 2] = sin_val;
self.cos_cache[i + self.dim / 2] = cos_val;
}
self.max_cached_pos = pos;
}
/// Apply rotary embedding to query/key vectors
#[inline]
pub fn apply(&self, x: &mut [i8], _pos: usize) {
let half_dim = self.dim / 2;
// Process pairs of dimensions
for i in 0..half_dim {
let x1 = x[i] as i32;
let x2 = x[i + half_dim] as i32;
let sin = self.sin_cache[i] as i32;
let cos = self.cos_cache[i] as i32;
// Rotation: [cos, -sin; sin, cos] @ [x1, x2]
let new_x1 = (x1 * cos - x2 * sin) >> 7;
let new_x2 = (x1 * sin + x2 * cos) >> 7;
x[i] = new_x1.clamp(-128, 127) as i8;
x[i + half_dim] = new_x2.clamp(-128, 127) as i8;
}
}
}
/// Simple positional encoding using learned embeddings
pub struct LearnedPositionalEmbedding<const MAX_LEN: usize, const DIM: usize> {
/// Position embeddings [MAX_LEN * DIM]
embeddings: HVec<i8, { 8 * 1024 }>, // Max 8KB for positions
/// Maximum sequence length
max_len: usize,
/// Embedding dimension
dim: usize,
}
impl<const MAX_LEN: usize, const DIM: usize> LearnedPositionalEmbedding<MAX_LEN, DIM> {
/// Create random positional embeddings
pub fn random(max_len: usize, dim: usize, seed: u32) -> crate::Result<Self> {
let mut embeddings = HVec::new();
let mut rng_state = seed;
for _ in 0..(max_len * dim) {
rng_state = rng_state.wrapping_mul(1103515245).wrapping_add(12345);
// Smaller values for positional embeddings
let val = (((rng_state >> 16) & 0x3F) as i8) - 32;
embeddings.push(val).map_err(|_| crate::Error::BufferOverflow)?;
}
Ok(Self {
embeddings,
max_len,
dim,
})
}
/// Add positional embedding to input
#[inline]
pub fn add_to(&self, input: &mut [i8], pos: usize) -> crate::Result<()> {
if pos >= self.max_len {
return Err(crate::Error::BufferOverflow);
}
let start = pos * self.dim;
for i in 0..self.dim {
let sum = input[i] as i32 + self.embeddings[start + i] as i32;
input[i] = sum.clamp(-128, 127) as i8;
}
Ok(())
}
/// Memory size in bytes
pub fn memory_size(&self) -> usize {
self.embeddings.len()
}
}
/// Byte-Pair Encoding tokenizer (simplified)
///
/// For ESP32, we use a simple character-level or small vocabulary tokenizer.
pub struct SimpleTokenizer {
/// Character to token ID mapping
char_to_id: [u16; 256],
/// Token ID to character mapping
id_to_char: [u8; 256],
/// Vocabulary size
vocab_size: usize,
}
impl SimpleTokenizer {
/// Create ASCII tokenizer (vocabulary = 128)
pub fn ascii() -> Self {
let mut char_to_id = [0u16; 256];
let mut id_to_char = [0u8; 256];
for i in 0..128 {
char_to_id[i] = i as u16;
id_to_char[i] = i as u8;
}
// Map non-ASCII to UNK (127)
for i in 128..256 {
char_to_id[i] = 127;
}
Self {
char_to_id,
id_to_char,
vocab_size: 128,
}
}
/// Tokenize a string
pub fn encode(&self, text: &str) -> HVec<u16, 128> {
let mut tokens = HVec::new();
for byte in text.bytes() {
let _ = tokens.push(self.char_to_id[byte as usize]);
}
tokens
}
/// Decode tokens to string
pub fn decode(&self, tokens: &[u16]) -> HVec<u8, 128> {
let mut chars = HVec::new();
for &token in tokens {
if (token as usize) < self.vocab_size {
let _ = chars.push(self.id_to_char[token as usize]);
}
}
chars
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_embedding_lookup() {
let embed: EmbeddingTable<256, 64> = EmbeddingTable::random(256, 64, 42).unwrap();
let mut output = [0i8; 64];
embed.lookup(10, &mut output).unwrap();
// Should be non-zero
assert!(output.iter().any(|&x| x != 0));
}
#[test]
fn test_rotary_embedding() {
let mut rope = RotaryEmbedding::new(32, 10000);
rope.update_cache(10);
let mut x = [64i8; 32];
rope.apply(&mut x, 5);
// Values should change after rotation
assert!(x.iter().any(|&v| v != 64));
}
#[test]
fn test_tokenizer() {
let tokenizer = SimpleTokenizer::ascii();
let tokens = tokenizer.encode("Hello");
assert_eq!(tokens.len(), 5);
let decoded = tokenizer.decode(&tokens);
assert_eq!(&decoded[..], b"Hello");
}
}