wifi-densepose/vendor/ruvector/crates/ruvllm/src/quantize/mod.rs

//! Quantization Pipeline for RuvLTRA Models
//!
//! This module provides quantization capabilities for converting full-precision
//! models to optimized quantized formats suitable for edge inference on Apple Silicon.
//!
//! ## Supported Quantization Formats
//!
//! | Format | Bits | Memory (0.5B) | Quality | Use Case |
//! |--------|------|---------------|---------|----------|
//! | Q4_K_M | 4.5  | ~300 MB       | Good    | Best quality/size tradeoff |
//! | Q5_K_M | 5.5  | ~375 MB       | Better  | Higher quality, still compact |
//! | Q8_0   | 8.5  | ~500 MB       | Best    | Near-lossless quantization |
//!
//! ## Apple Neural Engine (ANE) Optimization
//!
//! The quantization pipeline produces weights optimized for ANE inference:
//! - 16-byte aligned weight layouts
//! - Blocked quantization compatible with ANE tile operations
//! - Optimized memory access patterns for M4 Pro's unified memory
//!
//! ## Example
//!
//! ```rust,ignore
//! use ruvllm::quantize::{RuvltraQuantizer, QuantConfig, TargetFormat};
//! use std::path::Path;
//!
//! // Create quantizer for Q4_K_M format
//! let config = QuantConfig::default()
//!     .with_format(TargetFormat::Q4_K_M)
//!     .with_ane_optimization(true);
//!
//! let quantizer = RuvltraQuantizer::new(config)?;
//!
//! // Quantize a model
//! quantizer.quantize_model(
//!     Path::new("qwen-0.5b.safetensors"),
//!     Path::new("ruvltra-small-q4.gguf"),
//! )?;
//! ```

mod ruvltra_quant;

pub use ruvltra_quant::{
    dequantize_for_ane,

    // Memory estimation
    estimate_memory_q4,
    estimate_memory_q5,
    estimate_memory_q8,
    // Quantization functions
    quantize_ruvltra_q4,
    quantize_ruvltra_q5,
    quantize_ruvltra_q8,
    MemoryEstimate,

    // Block types
    Q4KMBlock,
    Q5KMBlock,
    Q8Block,

    QuantConfig,
    // Progress tracking
    QuantProgress,
    QuantStats,
    // Core quantizer
    RuvltraQuantizer,
    TargetFormat,
};