69 lines
1.9 KiB
Rust
69 lines
1.9 KiB
Rust
//! Quantization Pipeline for RuvLTRA Models
|
|
//!
|
|
//! This module provides quantization capabilities for converting full-precision
|
|
//! models to optimized quantized formats suitable for edge inference on Apple Silicon.
|
|
//!
|
|
//! ## Supported Quantization Formats
|
|
//!
|
|
//! | Format | Bits | Memory (0.5B) | Quality | Use Case |
|
|
//! |--------|------|---------------|---------|----------|
|
|
//! | Q4_K_M | 4.5 | ~300 MB | Good | Best quality/size tradeoff |
|
|
//! | Q5_K_M | 5.5 | ~375 MB | Better | Higher quality, still compact |
|
|
//! | Q8_0 | 8.5 | ~500 MB | Best | Near-lossless quantization |
|
|
//!
|
|
//! ## Apple Neural Engine (ANE) Optimization
|
|
//!
|
|
//! The quantization pipeline produces weights optimized for ANE inference:
|
|
//! - 16-byte aligned weight layouts
|
|
//! - Blocked quantization compatible with ANE tile operations
|
|
//! - Optimized memory access patterns for M4 Pro's unified memory
|
|
//!
|
|
//! ## Example
|
|
//!
|
|
//! ```rust,ignore
|
|
//! use ruvllm::quantize::{RuvltraQuantizer, QuantConfig, TargetFormat};
|
|
//! use std::path::Path;
|
|
//!
|
|
//! // Create quantizer for Q4_K_M format
|
|
//! let config = QuantConfig::default()
|
|
//! .with_format(TargetFormat::Q4_K_M)
|
|
//! .with_ane_optimization(true);
|
|
//!
|
|
//! let quantizer = RuvltraQuantizer::new(config)?;
|
|
//!
|
|
//! // Quantize a model
|
|
//! quantizer.quantize_model(
|
|
//! Path::new("qwen-0.5b.safetensors"),
|
|
//! Path::new("ruvltra-small-q4.gguf"),
|
|
//! )?;
|
|
//! ```
|
|
|
|
mod ruvltra_quant;
|
|
|
|
pub use ruvltra_quant::{
|
|
dequantize_for_ane,
|
|
|
|
// Memory estimation
|
|
estimate_memory_q4,
|
|
estimate_memory_q5,
|
|
estimate_memory_q8,
|
|
// Quantization functions
|
|
quantize_ruvltra_q4,
|
|
quantize_ruvltra_q5,
|
|
quantize_ruvltra_q8,
|
|
MemoryEstimate,
|
|
|
|
// Block types
|
|
Q4KMBlock,
|
|
Q5KMBlock,
|
|
Q8Block,
|
|
|
|
QuantConfig,
|
|
// Progress tracking
|
|
QuantProgress,
|
|
QuantStats,
|
|
// Core quantizer
|
|
RuvltraQuantizer,
|
|
TargetFormat,
|
|
};
|