679 lines
20 KiB
Rust
679 lines
20 KiB
Rust
//! Tensor compression with adaptive level selection
|
|
//!
|
|
//! This module provides multi-level tensor compression based on access frequency:
|
|
//! - Hot data (f > 0.8): Full precision
|
|
//! - Warm data (f > 0.4): Half precision
|
|
//! - Cool data (f > 0.1): 8-bit product quantization
|
|
//! - Cold data (f > 0.01): 4-bit product quantization
|
|
//! - Archive (f <= 0.01): Binary quantization
|
|
|
|
use crate::error::{GnnError, Result};
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
/// Compression level with associated parameters
|
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
|
pub enum CompressionLevel {
|
|
/// Full precision - no compression
|
|
None,
|
|
|
|
/// Half precision with scale factor
|
|
Half { scale: f32 },
|
|
|
|
/// Product quantization with 8-bit codes
|
|
PQ8 { subvectors: u8, centroids: u8 },
|
|
|
|
/// Product quantization with 4-bit codes and outlier handling
|
|
PQ4 {
|
|
subvectors: u8,
|
|
outlier_threshold: f32,
|
|
},
|
|
|
|
/// Binary quantization with threshold
|
|
Binary { threshold: f32 },
|
|
}
|
|
|
|
/// Compressed tensor data
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub enum CompressedTensor {
|
|
/// Uncompressed full precision data
|
|
Full { data: Vec<f32> },
|
|
|
|
/// Half precision data
|
|
Half {
|
|
data: Vec<u16>,
|
|
scale: f32,
|
|
dim: usize,
|
|
},
|
|
|
|
/// 8-bit product quantization
|
|
PQ8 {
|
|
codes: Vec<u8>,
|
|
codebooks: Vec<Vec<f32>>,
|
|
subvector_dim: usize,
|
|
dim: usize,
|
|
},
|
|
|
|
/// 4-bit product quantization with outliers
|
|
PQ4 {
|
|
codes: Vec<u8>, // Packed 4-bit codes
|
|
codebooks: Vec<Vec<f32>>,
|
|
outliers: Vec<(usize, f32)>, // (index, value) pairs
|
|
subvector_dim: usize,
|
|
dim: usize,
|
|
},
|
|
|
|
/// Binary quantization
|
|
Binary {
|
|
bits: Vec<u8>,
|
|
threshold: f32,
|
|
dim: usize,
|
|
},
|
|
}
|
|
|
|
/// Tensor compressor with adaptive level selection
|
|
#[derive(Debug, Clone)]
|
|
pub struct TensorCompress {
|
|
/// Default compression parameters
|
|
default_level: CompressionLevel,
|
|
}
|
|
|
|
impl Default for TensorCompress {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
impl TensorCompress {
|
|
/// Create a new tensor compressor with default settings
|
|
pub fn new() -> Self {
|
|
Self {
|
|
default_level: CompressionLevel::None,
|
|
}
|
|
}
|
|
|
|
/// Compress an embedding based on access frequency
|
|
///
|
|
/// # Arguments
|
|
/// * `embedding` - The input embedding vector
|
|
/// * `access_freq` - Access frequency in range [0.0, 1.0]
|
|
///
|
|
/// # Returns
|
|
/// Compressed tensor using adaptive compression level
|
|
pub fn compress(&self, embedding: &[f32], access_freq: f32) -> Result<CompressedTensor> {
|
|
if embedding.is_empty() {
|
|
return Err(GnnError::InvalidInput("Empty embedding vector".to_string()));
|
|
}
|
|
|
|
let level = self.select_level(access_freq);
|
|
self.compress_with_level(embedding, &level)
|
|
}
|
|
|
|
/// Compress with explicit compression level
|
|
pub fn compress_with_level(
|
|
&self,
|
|
embedding: &[f32],
|
|
level: &CompressionLevel,
|
|
) -> Result<CompressedTensor> {
|
|
match level {
|
|
CompressionLevel::None => self.compress_none(embedding),
|
|
CompressionLevel::Half { scale } => self.compress_half(embedding, *scale),
|
|
CompressionLevel::PQ8 {
|
|
subvectors,
|
|
centroids,
|
|
} => self.compress_pq8(embedding, *subvectors, *centroids),
|
|
CompressionLevel::PQ4 {
|
|
subvectors,
|
|
outlier_threshold,
|
|
} => self.compress_pq4(embedding, *subvectors, *outlier_threshold),
|
|
CompressionLevel::Binary { threshold } => self.compress_binary(embedding, *threshold),
|
|
}
|
|
}
|
|
|
|
/// Decompress a compressed tensor
|
|
pub fn decompress(&self, compressed: &CompressedTensor) -> Result<Vec<f32>> {
|
|
match compressed {
|
|
CompressedTensor::Full { data } => Ok(data.clone()),
|
|
CompressedTensor::Half { data, scale, dim } => self.decompress_half(data, *scale, *dim),
|
|
CompressedTensor::PQ8 {
|
|
codes,
|
|
codebooks,
|
|
subvector_dim,
|
|
dim,
|
|
} => self.decompress_pq8(codes, codebooks, *subvector_dim, *dim),
|
|
CompressedTensor::PQ4 {
|
|
codes,
|
|
codebooks,
|
|
outliers,
|
|
subvector_dim,
|
|
dim,
|
|
} => self.decompress_pq4(codes, codebooks, outliers, *subvector_dim, *dim),
|
|
CompressedTensor::Binary {
|
|
bits,
|
|
threshold,
|
|
dim,
|
|
} => self.decompress_binary(bits, *threshold, *dim),
|
|
}
|
|
}
|
|
|
|
/// Select compression level based on access frequency
|
|
///
|
|
/// Thresholds:
|
|
/// - f > 0.8: None (hot data)
|
|
/// - f > 0.4: Half (warm data)
|
|
/// - f > 0.1: PQ8 (cool data)
|
|
/// - f > 0.01: PQ4 (cold data)
|
|
/// - f <= 0.01: Binary (archive)
|
|
fn select_level(&self, access_freq: f32) -> CompressionLevel {
|
|
if access_freq > 0.8 {
|
|
CompressionLevel::None
|
|
} else if access_freq > 0.4 {
|
|
CompressionLevel::Half { scale: 1.0 }
|
|
} else if access_freq > 0.1 {
|
|
CompressionLevel::PQ8 {
|
|
subvectors: 8,
|
|
centroids: 16,
|
|
}
|
|
} else if access_freq > 0.01 {
|
|
CompressionLevel::PQ4 {
|
|
subvectors: 8,
|
|
outlier_threshold: 3.0,
|
|
}
|
|
} else {
|
|
CompressionLevel::Binary { threshold: 0.0 }
|
|
}
|
|
}
|
|
|
|
// === Compression implementations ===
|
|
|
|
fn compress_none(&self, embedding: &[f32]) -> Result<CompressedTensor> {
|
|
Ok(CompressedTensor::Full {
|
|
data: embedding.to_vec(),
|
|
})
|
|
}
|
|
|
|
fn compress_half(&self, embedding: &[f32], scale: f32) -> Result<CompressedTensor> {
|
|
// Simple half precision: scale and convert to 16-bit
|
|
let data: Vec<u16> = embedding
|
|
.iter()
|
|
.map(|&x| {
|
|
let scaled = x * scale;
|
|
let clamped = scaled.clamp(-65504.0, 65504.0);
|
|
// Convert to half precision representation
|
|
f32_to_f16_bits(clamped)
|
|
})
|
|
.collect();
|
|
|
|
Ok(CompressedTensor::Half {
|
|
data,
|
|
scale,
|
|
dim: embedding.len(),
|
|
})
|
|
}
|
|
|
|
fn compress_pq8(
|
|
&self,
|
|
embedding: &[f32],
|
|
subvectors: u8,
|
|
centroids: u8,
|
|
) -> Result<CompressedTensor> {
|
|
let dim = embedding.len();
|
|
let subvectors = subvectors as usize;
|
|
|
|
if dim % subvectors != 0 {
|
|
return Err(GnnError::InvalidInput(format!(
|
|
"Dimension {} not divisible by subvectors {}",
|
|
dim, subvectors
|
|
)));
|
|
}
|
|
|
|
let subvector_dim = dim / subvectors;
|
|
let mut codes = Vec::with_capacity(subvectors);
|
|
let mut codebooks = Vec::with_capacity(subvectors);
|
|
|
|
// For each subvector, create a codebook and quantize
|
|
for i in 0..subvectors {
|
|
let start = i * subvector_dim;
|
|
let end = start + subvector_dim;
|
|
let subvector = &embedding[start..end];
|
|
|
|
// Simple k-means clustering (k=centroids)
|
|
let (codebook, code) = self.quantize_subvector(subvector, centroids as usize);
|
|
codes.push(code);
|
|
codebooks.push(codebook);
|
|
}
|
|
|
|
Ok(CompressedTensor::PQ8 {
|
|
codes,
|
|
codebooks,
|
|
subvector_dim,
|
|
dim,
|
|
})
|
|
}
|
|
|
|
fn compress_pq4(
|
|
&self,
|
|
embedding: &[f32],
|
|
subvectors: u8,
|
|
outlier_threshold: f32,
|
|
) -> Result<CompressedTensor> {
|
|
let dim = embedding.len();
|
|
let subvectors = subvectors as usize;
|
|
|
|
if dim % subvectors != 0 {
|
|
return Err(GnnError::InvalidInput(format!(
|
|
"Dimension {} not divisible by subvectors {}",
|
|
dim, subvectors
|
|
)));
|
|
}
|
|
|
|
let subvector_dim = dim / subvectors;
|
|
let mut codes = Vec::with_capacity(subvectors);
|
|
let mut codebooks = Vec::with_capacity(subvectors);
|
|
let mut outliers = Vec::new();
|
|
|
|
// Detect outliers based on magnitude
|
|
let mean = embedding.iter().sum::<f32>() / dim as f32;
|
|
let std_dev =
|
|
(embedding.iter().map(|&x| (x - mean).powi(2)).sum::<f32>() / dim as f32).sqrt();
|
|
|
|
// For each subvector
|
|
for i in 0..subvectors {
|
|
let start = i * subvector_dim;
|
|
let end = start + subvector_dim;
|
|
let subvector = &embedding[start..end];
|
|
|
|
// Extract outliers
|
|
let mut cleaned_subvector = subvector.to_vec();
|
|
for (j, &val) in subvector.iter().enumerate() {
|
|
if (val - mean).abs() > outlier_threshold * std_dev {
|
|
outliers.push((start + j, val));
|
|
cleaned_subvector[j] = mean; // Replace with mean
|
|
}
|
|
}
|
|
|
|
// Quantize to 4-bit (16 centroids)
|
|
let (codebook, code) = self.quantize_subvector(&cleaned_subvector, 16);
|
|
codes.push(code);
|
|
codebooks.push(codebook);
|
|
}
|
|
|
|
Ok(CompressedTensor::PQ4 {
|
|
codes,
|
|
codebooks,
|
|
outliers,
|
|
subvector_dim,
|
|
dim,
|
|
})
|
|
}
|
|
|
|
fn compress_binary(&self, embedding: &[f32], threshold: f32) -> Result<CompressedTensor> {
|
|
let dim = embedding.len();
|
|
let num_bytes = (dim + 7) / 8;
|
|
let mut bits = vec![0u8; num_bytes];
|
|
|
|
for (i, &val) in embedding.iter().enumerate() {
|
|
if val > threshold {
|
|
let byte_idx = i / 8;
|
|
let bit_idx = i % 8;
|
|
bits[byte_idx] |= 1 << bit_idx;
|
|
}
|
|
}
|
|
|
|
Ok(CompressedTensor::Binary {
|
|
bits,
|
|
threshold,
|
|
dim,
|
|
})
|
|
}
|
|
|
|
// === Decompression implementations ===
|
|
|
|
fn decompress_half(&self, data: &[u16], scale: f32, dim: usize) -> Result<Vec<f32>> {
|
|
if data.len() != dim {
|
|
return Err(GnnError::InvalidInput(format!(
|
|
"Dimension mismatch: expected {}, got {}",
|
|
dim,
|
|
data.len()
|
|
)));
|
|
}
|
|
|
|
Ok(data
|
|
.iter()
|
|
.map(|&bits| f16_bits_to_f32(bits) / scale)
|
|
.collect())
|
|
}
|
|
|
|
fn decompress_pq8(
|
|
&self,
|
|
codes: &[u8],
|
|
codebooks: &[Vec<f32>],
|
|
subvector_dim: usize,
|
|
dim: usize,
|
|
) -> Result<Vec<f32>> {
|
|
let subvectors = codes.len();
|
|
let expected_dim = subvectors * subvector_dim;
|
|
|
|
if expected_dim != dim {
|
|
return Err(GnnError::InvalidInput(format!(
|
|
"Dimension mismatch: expected {}, got {}",
|
|
dim, expected_dim
|
|
)));
|
|
}
|
|
|
|
let mut result = Vec::with_capacity(dim);
|
|
|
|
for (code, codebook) in codes.iter().zip(codebooks.iter()) {
|
|
let centroid_idx = *code as usize;
|
|
if centroid_idx >= codebook.len() / subvector_dim {
|
|
return Err(GnnError::InvalidInput(format!(
|
|
"Invalid centroid index: {}",
|
|
centroid_idx
|
|
)));
|
|
}
|
|
|
|
let start = centroid_idx * subvector_dim;
|
|
let end = start + subvector_dim;
|
|
result.extend_from_slice(&codebook[start..end]);
|
|
}
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
fn decompress_pq4(
|
|
&self,
|
|
codes: &[u8],
|
|
codebooks: &[Vec<f32>],
|
|
outliers: &[(usize, f32)],
|
|
subvector_dim: usize,
|
|
dim: usize,
|
|
) -> Result<Vec<f32>> {
|
|
// First decompress using PQ8 logic
|
|
let mut result = self.decompress_pq8(codes, codebooks, subvector_dim, dim)?;
|
|
|
|
// Restore outliers
|
|
for &(idx, val) in outliers {
|
|
if idx < result.len() {
|
|
result[idx] = val;
|
|
}
|
|
}
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
fn decompress_binary(&self, bits: &[u8], _threshold: f32, dim: usize) -> Result<Vec<f32>> {
|
|
let expected_bytes = (dim + 7) / 8;
|
|
if bits.len() != expected_bytes {
|
|
return Err(GnnError::InvalidInput(format!(
|
|
"Dimension mismatch: expected {} bytes, got {}",
|
|
expected_bytes,
|
|
bits.len()
|
|
)));
|
|
}
|
|
|
|
let mut result = Vec::with_capacity(dim);
|
|
|
|
for i in 0..dim {
|
|
let byte_idx = i / 8;
|
|
let bit_idx = i % 8;
|
|
let is_set = (bits[byte_idx] & (1 << bit_idx)) != 0;
|
|
result.push(if is_set { 1.0 } else { -1.0 });
|
|
}
|
|
|
|
Ok(result)
|
|
}
|
|
|
|
// === Helper methods ===
|
|
|
|
/// Simple quantization using k-means-like approach
|
|
fn quantize_subvector(&self, subvector: &[f32], k: usize) -> (Vec<f32>, u8) {
|
|
let dim = subvector.len();
|
|
|
|
// Initialize centroids using simple range-based approach
|
|
let min_val = subvector.iter().cloned().fold(f32::INFINITY, f32::min);
|
|
let max_val = subvector.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
|
|
let range = max_val - min_val;
|
|
|
|
if range < 1e-6 {
|
|
// All values are essentially the same
|
|
let codebook = vec![min_val; dim * k];
|
|
return (codebook, 0);
|
|
}
|
|
|
|
// Create k centroids evenly spaced across the range
|
|
let centroids: Vec<Vec<f32>> = (0..k)
|
|
.map(|i| {
|
|
let offset = min_val + (i as f32 / k as f32) * range;
|
|
vec![offset; dim]
|
|
})
|
|
.collect();
|
|
|
|
// Find nearest centroid for this subvector
|
|
let code = self.nearest_centroid(subvector, ¢roids);
|
|
|
|
// Flatten codebook
|
|
let codebook: Vec<f32> = centroids.into_iter().flatten().collect();
|
|
|
|
(codebook, code as u8)
|
|
}
|
|
|
|
fn nearest_centroid(&self, subvector: &[f32], centroids: &[Vec<f32>]) -> usize {
|
|
centroids
|
|
.iter()
|
|
.enumerate()
|
|
.map(|(i, centroid)| {
|
|
let dist: f32 = subvector
|
|
.iter()
|
|
.zip(centroid.iter())
|
|
.map(|(a, b)| (a - b).powi(2))
|
|
.sum();
|
|
(i, dist)
|
|
})
|
|
.min_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
|
|
.map(|(i, _)| i)
|
|
.unwrap_or(0)
|
|
}
|
|
}
|
|
|
|
// === Half precision conversion helpers ===
|
|
|
|
/// Convert f32 to f16 bits (simplified implementation)
|
|
fn f32_to_f16_bits(value: f32) -> u16 {
|
|
// Simple conversion: scale to 16-bit range
|
|
// This is a simplified version, not IEEE 754 half precision
|
|
let scaled = (value * 1000.0).clamp(-32768.0, 32767.0);
|
|
((scaled as i32) + 32768) as u16
|
|
}
|
|
|
|
/// Convert f16 bits to f32 (simplified implementation)
|
|
fn f16_bits_to_f32(bits: u16) -> f32 {
|
|
// Reverse of f32_to_f16_bits
|
|
let value = bits as i32 - 32768;
|
|
value as f32 / 1000.0
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_compress_none() {
|
|
let compressor = TensorCompress::new();
|
|
let embedding = vec![1.0, 2.0, 3.0, 4.0];
|
|
|
|
let compressed = compressor.compress(&embedding, 1.0).unwrap();
|
|
let decompressed = compressor.decompress(&compressed).unwrap();
|
|
|
|
assert_eq!(embedding, decompressed);
|
|
}
|
|
|
|
#[test]
|
|
fn test_compress_half() {
|
|
let compressor = TensorCompress::new();
|
|
let embedding = vec![1.0, 2.0, 3.0, 4.0];
|
|
|
|
let compressed = compressor.compress(&embedding, 0.5).unwrap();
|
|
let decompressed = compressor.decompress(&compressed).unwrap();
|
|
|
|
// Half precision should be close but not exact
|
|
for (a, b) in embedding.iter().zip(decompressed.iter()) {
|
|
assert!((a - b).abs() < 0.01, "Expected {}, got {}", a, b);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_compress_binary() {
|
|
let compressor = TensorCompress::new();
|
|
let embedding = vec![1.0, -1.0, 0.5, -0.5];
|
|
|
|
let compressed = compressor.compress(&embedding, 0.005).unwrap();
|
|
let decompressed = compressor.decompress(&compressed).unwrap();
|
|
|
|
// Binary should be +1 or -1
|
|
assert_eq!(decompressed.len(), embedding.len());
|
|
for val in &decompressed {
|
|
assert!(*val == 1.0 || *val == -1.0);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_select_level() {
|
|
let compressor = TensorCompress::new();
|
|
|
|
// Hot data
|
|
assert!(matches!(
|
|
compressor.select_level(0.9),
|
|
CompressionLevel::None
|
|
));
|
|
|
|
// Warm data
|
|
assert!(matches!(
|
|
compressor.select_level(0.5),
|
|
CompressionLevel::Half { .. }
|
|
));
|
|
|
|
// Cool data
|
|
assert!(matches!(
|
|
compressor.select_level(0.2),
|
|
CompressionLevel::PQ8 { .. }
|
|
));
|
|
|
|
// Cold data
|
|
assert!(matches!(
|
|
compressor.select_level(0.05),
|
|
CompressionLevel::PQ4 { .. }
|
|
));
|
|
|
|
// Archive
|
|
assert!(matches!(
|
|
compressor.select_level(0.001),
|
|
CompressionLevel::Binary { .. }
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn test_empty_embedding() {
|
|
let compressor = TensorCompress::new();
|
|
let result = compressor.compress(&[], 0.5);
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn test_pq8_compression() {
|
|
let compressor = TensorCompress::new();
|
|
let embedding: Vec<f32> = (0..64).map(|i| i as f32 * 0.1).collect();
|
|
|
|
let compressed = compressor.compress_pq8(&embedding, 8, 16).unwrap();
|
|
let decompressed = compressor.decompress(&compressed).unwrap();
|
|
|
|
assert_eq!(decompressed.len(), embedding.len());
|
|
}
|
|
|
|
#[test]
|
|
fn test_round_trip_all_levels() {
|
|
let compressor = TensorCompress::new();
|
|
let embedding: Vec<f32> = (0..128).map(|i| (i as f32 - 64.0) * 0.01).collect();
|
|
|
|
let access_frequencies = vec![0.9, 0.5, 0.2, 0.05, 0.001];
|
|
|
|
for freq in access_frequencies {
|
|
let compressed = compressor.compress(&embedding, freq).unwrap();
|
|
let decompressed = compressor.decompress(&compressed).unwrap();
|
|
assert_eq!(decompressed.len(), embedding.len());
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_half_precision_roundtrip() {
|
|
let compressor = TensorCompress::new();
|
|
// Use values within the supported range (-32.768 to 32.767)
|
|
let values = vec![-30.0, -1.0, 0.0, 1.0, 30.0];
|
|
|
|
for val in values {
|
|
let embedding = vec![val; 4];
|
|
let compressed = compressor
|
|
.compress_with_level(&embedding, &CompressionLevel::Half { scale: 1.0 })
|
|
.unwrap();
|
|
let decompressed = compressor.decompress(&compressed).unwrap();
|
|
|
|
for (a, b) in embedding.iter().zip(decompressed.iter()) {
|
|
let diff = (a - b).abs();
|
|
assert!(
|
|
diff < 0.1,
|
|
"Value {} decompressed to {}, diff: {}",
|
|
a,
|
|
b,
|
|
diff
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_binary_threshold() {
|
|
let compressor = TensorCompress::new();
|
|
let embedding = vec![0.5, -0.5, 1.5, -1.5];
|
|
|
|
let compressed = compressor
|
|
.compress_with_level(&embedding, &CompressionLevel::Binary { threshold: 0.0 })
|
|
.unwrap();
|
|
let decompressed = compressor.decompress(&compressed).unwrap();
|
|
|
|
// Values > 0 should be 1.0, values <= 0 should be -1.0
|
|
assert_eq!(decompressed, vec![1.0, -1.0, 1.0, -1.0]);
|
|
}
|
|
|
|
#[test]
|
|
fn test_pq4_with_outliers() {
|
|
let compressor = TensorCompress::new();
|
|
// Create embedding with some outliers
|
|
let mut embedding: Vec<f32> = (0..64).map(|i| i as f32 * 0.01).collect();
|
|
embedding[10] = 100.0; // Outlier
|
|
embedding[30] = -100.0; // Outlier
|
|
|
|
let compressed = compressor
|
|
.compress_with_level(
|
|
&embedding,
|
|
&CompressionLevel::PQ4 {
|
|
subvectors: 8,
|
|
outlier_threshold: 2.0,
|
|
},
|
|
)
|
|
.unwrap();
|
|
let decompressed = compressor.decompress(&compressed).unwrap();
|
|
|
|
assert_eq!(decompressed.len(), embedding.len());
|
|
// Outliers should be preserved
|
|
assert_eq!(decompressed[10], 100.0);
|
|
assert_eq!(decompressed[30], -100.0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_dimension_validation() {
|
|
let compressor = TensorCompress::new();
|
|
let embedding = vec![1.0; 10]; // Not divisible by 8
|
|
|
|
let result = compressor.compress_pq8(&embedding, 8, 16);
|
|
assert!(result.is_err());
|
|
}
|
|
}
|