//! # ruvector-attention
//!
//! Attention mechanisms for ruvector, including geometric, graph, and sparse attention.
//!
//! This crate provides efficient implementations of various attention mechanisms:
//! - Scaled dot-product attention
//! - Multi-head attention with parallel processing
//! - Graph attention for GNN applications
//! - Geometric attention in hyperbolic spaces
//! - Sparse attention patterns
//!
//! ## Features
//!
//! - **SIMD Acceleration**: Optional SIMD optimizations for performance
//! - **Parallel Processing**: Rayon-based parallel head computation
//! - **WASM Support**: WebAssembly compilation support
//! - **NAPI Bindings**: Node.js bindings for JavaScript integration
//!
//! ## Example
//!
//! ```rust
//! use ruvector_attention::{
//!     attention::ScaledDotProductAttention,
//!     traits::Attention,
//! };
//!
//! // Create scaled dot-product attention
//! let attention = ScaledDotProductAttention::new(512);
//!
//! // Prepare inputs
//! let query = vec![1.0; 512];
//! let keys = vec![vec![0.5; 512], vec![0.3; 512]];
//! let values = vec![vec![1.0; 512], vec![2.0; 512]];
//!
//! let keys_refs: Vec<&[f32]> = keys.iter().map(|k| k.as_slice()).collect();
//! let values_refs: Vec<&[f32]> = values.iter().map(|v| v.as_slice()).collect();
//!
//! // Compute attention
//! let output = attention.compute(&query, &keys_refs, &values_refs).unwrap();
//! assert_eq!(output.len(), 512);
//! ```

pub mod attention;
pub mod config;
pub mod error;
pub mod graph;
pub mod hyperbolic;
pub mod moe;
pub mod sdk;
pub mod sparse;
pub mod training;
pub mod traits;
pub mod utils;

// Advanced attention mechanisms
pub mod curvature;
pub mod topology;
pub mod transport;

// Mathematical foundations
pub mod info_bottleneck;
pub mod info_geometry;
pub mod pde_attention;
pub mod unified_report;

// Sheaf attention (Coherence-Gated Transformer per ADR-015)
#[cfg(feature = "sheaf")]
pub mod sheaf;

// Re-export main types
pub use attention::{MultiHeadAttention, ScaledDotProductAttention};
pub use config::{AttentionConfig, GraphAttentionConfig, SparseAttentionConfig};
pub use error::{AttentionError, AttentionResult};
pub use hyperbolic::{
    exp_map, log_map, mobius_add, poincare_distance, project_to_ball, HyperbolicAttention,
    HyperbolicAttentionConfig, MixedCurvatureAttention, MixedCurvatureConfig,
};
pub use traits::{
    Attention, EdgeInfo, GeometricAttention, Gradients, GraphAttention, SparseAttention,
    SparseMask, TrainableAttention,
};

// Sparse attention exports
pub use sparse::{
    AttentionMask, FlashAttention, LinearAttention, LocalGlobalAttention, SparseMaskBuilder,
};

// MoE exports
pub use moe::{
    Expert, ExpertType, HyperbolicExpert, LearnedRouter, LinearExpert, MoEAttention, MoEConfig,
    Router, StandardExpert, TopKRouting,
};

// Graph attention exports
pub use graph::{
    DualSpaceAttention, DualSpaceConfig, EdgeFeaturedAttention, EdgeFeaturedConfig, GraphRoPE,
    RoPEConfig,
};

// Training exports
pub use training::{
    Adam, AdamW, CurriculumScheduler, CurriculumStage, DecayType, HardNegativeMiner, InfoNCELoss,
    LocalContrastiveLoss, Loss, MiningStrategy, NegativeMiner, Optimizer, Reduction,
    SpectralRegularization, TemperatureAnnealing, SGD,
};

// SDK exports
pub use sdk::{presets, AttentionBuilder, AttentionPipeline};

// Transport (OT-based attention) exports
pub use transport::{
    CentroidCache, CentroidOTAttention, CentroidOTConfig, ProjectionCache,
    SlicedWassersteinAttention, SlicedWassersteinConfig, WindowCache,
};

// Curvature (Mixed curvature attention) exports
pub use curvature::{
    ComponentQuantizer, FusedCurvatureConfig, MixedCurvatureCache, MixedCurvatureFusedAttention,
    QuantizationConfig, QuantizedVector, TangentSpaceConfig, TangentSpaceMapper,
};

// Topology (Gated attention) exports
pub use topology::{
    AttentionMode, AttentionPolicy, CoherenceMetric, PolicyConfig, TopologyGatedAttention,
    TopologyGatedConfig, WindowCoherence,
};

// Information Geometry exports
pub use info_geometry::{FisherConfig, FisherMetric, NaturalGradient, NaturalGradientConfig};

// Information Bottleneck exports
pub use info_bottleneck::{DiagonalGaussian, IBConfig, InformationBottleneck, KLDivergence};

// PDE Attention exports
pub use pde_attention::{DiffusionAttention, DiffusionConfig, GraphLaplacian, LaplacianType};

// Sheaf Attention exports (Coherence-Gated Transformer per ADR-015)
#[cfg(feature = "sheaf")]
pub use sheaf::{
    process_with_early_exit, ComputeLane, EarlyExit, EarlyExitConfig, EarlyExitResult,
    EarlyExitStatistics, ExitReason, LaneStatistics, ResidualSparseMask, RestrictionMap,
    RestrictionMapConfig, RoutingDecision, SheafAttention, SheafAttentionConfig,
    SparseResidualAttention, SparseResidualConfig, SparsityStatistics, TokenRouter,
    TokenRouterConfig,
};

// Unified Report exports
pub use unified_report::{
    AttentionRecommendation, GeometryReport, MetricType, MetricValue, ReportBuilder, ReportConfig,
};

/// Library version
pub const VERSION: &str = env!("CARGO_PKG_VERSION");

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_version() {
        assert!(!VERSION.is_empty());
    }

    #[test]
    fn test_basic_attention_workflow() {
        let config = AttentionConfig::builder()
            .dim(64)
            .num_heads(4)
            .build()
            .unwrap();

        assert_eq!(config.dim, 64);
        assert_eq!(config.num_heads, 4);
        assert_eq!(config.head_dim(), 16);
    }
}