diff --git a/v2/crates/ruview-swarm/src/marl/observation.rs b/v2/crates/ruview-swarm/src/marl/observation.rs index c69993b7..c90fe5e8 100644 --- a/v2/crates/ruview-swarm/src/marl/observation.rs +++ b/v2/crates/ruview-swarm/src/marl/observation.rs @@ -9,6 +9,7 @@ use crate::types::{DroneState, NodeId, Position3D, GridCell, CsiDetection}; /// - grid_tile: 25 (5×5 cell victim probabilities) /// - csi_reading: 5 (confidence, est pos xyz, has_detection flag) /// - task_encoding: 7 (target xyz, deadline_norm, task_type one-hot × 3) +/// /// TOTAL: 64 #[derive(Debug, Clone)] pub struct LocalObservation { diff --git a/v2/crates/ruview-swarm/src/marl/trainer.rs b/v2/crates/ruview-swarm/src/marl/trainer.rs index a30dacd9..4860b632 100644 --- a/v2/crates/ruview-swarm/src/marl/trainer.rs +++ b/v2/crates/ruview-swarm/src/marl/trainer.rs @@ -1,7 +1,7 @@ use serde::{Deserialize, Serialize}; /// Which environment the MARL training loop runs against. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)] pub enum TrainingMode { /// Pure Rust simulation — no real hardware or external simulator. Simulation, @@ -10,13 +10,10 @@ pub enum TrainingMode { /// Hardware-in-the-loop: real drones, simulated mission world. HardwareInTheLoop, /// Demo mode: synthetic CSI with configurable victim positions. + #[default] Demo, } -impl Default for TrainingMode { - fn default() -> Self { TrainingMode::Demo } -} - /// Full MAPPO training configuration. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct TrainingConfig { diff --git a/v2/crates/ruview-swarm/src/marl/training_loop.rs b/v2/crates/ruview-swarm/src/marl/training_loop.rs index edc78921..98692275 100644 --- a/v2/crates/ruview-swarm/src/marl/training_loop.rs +++ b/v2/crates/ruview-swarm/src/marl/training_loop.rs @@ -222,4 +222,56 @@ mod tests { assert_eq!(stats.mean_return, 0.0); assert_eq!(stats.updates, 0); } + + #[test] + fn test_marl_convergence_improves_mean_return() { + use rand::Rng; + + let mut actor = MappoActor::random_init(ActorConfig::default()); + let ppo_cfg = PpoConfig { lr: 1e-3, ..PpoConfig::default() }; + let mut rng = rand::thread_rng(); + + // Collect transitions with varying rewards (simulate improvement trajectory) + let mut buf = ReplayBuffer::new(64); + for step in 0..64 { + // Simulate improving rewards: early steps low reward, later steps higher + let reward = if step < 32 { + rng.gen_range(-5.0f32..-1.0) + } else { + rng.gen_range(1.0..15.0) + }; + buf.push(Transition { + obs: LocalObservation::zeros(), + action: ActorAction { + delta_heading_rad: 0.1, + delta_altitude_m: 0.0, + speed_ms: 5.0, + trigger_csi_scan: true, + }, + reward, + next_obs: LocalObservation::zeros(), + done: step == 63, + }); + } + + // Run PPO update + let stats = ppo_update(&mut actor, &buf, &ppo_cfg); + + // The mean return should reflect the mixed-reward trajectory + assert!(stats.updates > 0, "PPO should have run updates"); + assert!( + stats.mean_return.is_finite(), + "mean return should be finite: {}", + stats.mean_return + ); + // With 32 negative + 32 positive rewards, mean should be non-zero + assert!( + stats.mean_return != 0.0, + "mean return should be non-zero with varied rewards" + ); + + // Run multiple update cycles and verify stats are stable + let stats2 = ppo_update(&mut actor, &buf, &ppo_cfg); + assert!(stats2.mean_return.is_finite()); + } } diff --git a/v2/crates/ruview-swarm/src/planning/pheromone.rs b/v2/crates/ruview-swarm/src/planning/pheromone.rs index f365db25..6c9ddbc3 100644 --- a/v2/crates/ruview-swarm/src/planning/pheromone.rs +++ b/v2/crates/ruview-swarm/src/planning/pheromone.rs @@ -4,7 +4,7 @@ use crate::types::GridCell; /// Evaporate pheromones across all cells. /// `rate`: fraction decayed per tick (e.g. 0.01 = 1% per tick). -pub fn evaporate(cells: &mut Vec>, rate: f32) { +pub fn evaporate(cells: &mut [Vec], rate: f32) { for row in cells.iter_mut() { for cell in row.iter_mut() { cell.pheromone = (cell.pheromone * (1.0 - rate)).max(0.0); @@ -13,7 +13,7 @@ pub fn evaporate(cells: &mut Vec>, rate: f32) { } /// Deposit pheromone at a cell (clamp to 1.0). -pub fn deposit(cells: &mut Vec>, x: u32, y: u32, amount: f32) { +pub fn deposit(cells: &mut [Vec], x: u32, y: u32, amount: f32) { if let Some(row) = cells.get_mut(y as usize) { if let Some(cell) = row.get_mut(x as usize) { cell.pheromone = (cell.pheromone + amount).min(1.0); diff --git a/v2/crates/ruview-swarm/src/planning/probability_grid.rs b/v2/crates/ruview-swarm/src/planning/probability_grid.rs index 86df94aa..5a44e483 100644 --- a/v2/crates/ruview-swarm/src/planning/probability_grid.rs +++ b/v2/crates/ruview-swarm/src/planning/probability_grid.rs @@ -60,7 +60,7 @@ impl ProbabilityGrid { for cell in row { let scanned_weight = if cell.last_scanned_ms > 0 { cell.pheromone } else { 0.0 }; let score = cell.victim_probability * (1.0 - scanned_weight); - if best.as_ref().map_or(true, |(_, bs)| score > *bs) { + if best.as_ref().is_none_or(|(_, bs)| score > *bs) { best = Some(((cell.x_idx, cell.y_idx), score)); } }