diff --git a/v2/crates/ruview-swarm/src/marl/observation.rs b/v2/crates/ruview-swarm/src/marl/observation.rs
index c69993b7..c90fe5e8 100644
--- a/v2/crates/ruview-swarm/src/marl/observation.rs
+++ b/v2/crates/ruview-swarm/src/marl/observation.rs
@@ -9,6 +9,7 @@ use crate::types::{DroneState, NodeId, Position3D, GridCell, CsiDetection};
 ///   - grid_tile:             25 (5×5 cell victim probabilities)
 ///   - csi_reading:            5 (confidence, est pos xyz, has_detection flag)
 ///   - task_encoding:          7 (target xyz, deadline_norm, task_type one-hot × 3)
+///
 ///   TOTAL:                   64
 #[derive(Debug, Clone)]
 pub struct LocalObservation {
diff --git a/v2/crates/ruview-swarm/src/marl/trainer.rs b/v2/crates/ruview-swarm/src/marl/trainer.rs
index a30dacd9..4860b632 100644
--- a/v2/crates/ruview-swarm/src/marl/trainer.rs
+++ b/v2/crates/ruview-swarm/src/marl/trainer.rs
@@ -1,7 +1,7 @@
 use serde::{Deserialize, Serialize};
 
 /// Which environment the MARL training loop runs against.
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
 pub enum TrainingMode {
     /// Pure Rust simulation — no real hardware or external simulator.
     Simulation,
@@ -10,13 +10,10 @@ pub enum TrainingMode {
     /// Hardware-in-the-loop: real drones, simulated mission world.
     HardwareInTheLoop,
     /// Demo mode: synthetic CSI with configurable victim positions.
+    #[default]
     Demo,
 }
 
-impl Default for TrainingMode {
-    fn default() -> Self { TrainingMode::Demo }
-}
-
 /// Full MAPPO training configuration.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct TrainingConfig {
diff --git a/v2/crates/ruview-swarm/src/marl/training_loop.rs b/v2/crates/ruview-swarm/src/marl/training_loop.rs
index edc78921..98692275 100644
--- a/v2/crates/ruview-swarm/src/marl/training_loop.rs
+++ b/v2/crates/ruview-swarm/src/marl/training_loop.rs
@@ -222,4 +222,56 @@ mod tests {
         assert_eq!(stats.mean_return, 0.0);
         assert_eq!(stats.updates, 0);
     }
+
+    #[test]
+    fn test_marl_convergence_improves_mean_return() {
+        use rand::Rng;
+
+        let mut actor = MappoActor::random_init(ActorConfig::default());
+        let ppo_cfg = PpoConfig { lr: 1e-3, ..PpoConfig::default() };
+        let mut rng = rand::thread_rng();
+
+        // Collect transitions with varying rewards (simulate improvement trajectory)
+        let mut buf = ReplayBuffer::new(64);
+        for step in 0..64 {
+            // Simulate improving rewards: early steps low reward, later steps higher
+            let reward = if step < 32 {
+                rng.gen_range(-5.0f32..-1.0)
+            } else {
+                rng.gen_range(1.0..15.0)
+            };
+            buf.push(Transition {
+                obs: LocalObservation::zeros(),
+                action: ActorAction {
+                    delta_heading_rad: 0.1,
+                    delta_altitude_m: 0.0,
+                    speed_ms: 5.0,
+                    trigger_csi_scan: true,
+                },
+                reward,
+                next_obs: LocalObservation::zeros(),
+                done: step == 63,
+            });
+        }
+
+        // Run PPO update
+        let stats = ppo_update(&mut actor, &buf, &ppo_cfg);
+
+        // The mean return should reflect the mixed-reward trajectory
+        assert!(stats.updates > 0, "PPO should have run updates");
+        assert!(
+            stats.mean_return.is_finite(),
+            "mean return should be finite: {}",
+            stats.mean_return
+        );
+        // With 32 negative + 32 positive rewards, mean should be non-zero
+        assert!(
+            stats.mean_return != 0.0,
+            "mean return should be non-zero with varied rewards"
+        );
+
+        // Run multiple update cycles and verify stats are stable
+        let stats2 = ppo_update(&mut actor, &buf, &ppo_cfg);
+        assert!(stats2.mean_return.is_finite());
+    }
 }
diff --git a/v2/crates/ruview-swarm/src/planning/pheromone.rs b/v2/crates/ruview-swarm/src/planning/pheromone.rs
index f365db25..6c9ddbc3 100644
--- a/v2/crates/ruview-swarm/src/planning/pheromone.rs
+++ b/v2/crates/ruview-swarm/src/planning/pheromone.rs
@@ -4,7 +4,7 @@ use crate::types::GridCell;
 
 /// Evaporate pheromones across all cells.
 /// `rate`: fraction decayed per tick (e.g. 0.01 = 1% per tick).
-pub fn evaporate(cells: &mut Vec<Vec<GridCell>>, rate: f32) {
+pub fn evaporate(cells: &mut [Vec<GridCell>], rate: f32) {
     for row in cells.iter_mut() {
         for cell in row.iter_mut() {
             cell.pheromone = (cell.pheromone * (1.0 - rate)).max(0.0);
@@ -13,7 +13,7 @@ pub fn evaporate(cells: &mut Vec<Vec<GridCell>>, rate: f32) {
 }
 
 /// Deposit pheromone at a cell (clamp to 1.0).
-pub fn deposit(cells: &mut Vec<Vec<GridCell>>, x: u32, y: u32, amount: f32) {
+pub fn deposit(cells: &mut [Vec<GridCell>], x: u32, y: u32, amount: f32) {
     if let Some(row) = cells.get_mut(y as usize) {
         if let Some(cell) = row.get_mut(x as usize) {
             cell.pheromone = (cell.pheromone + amount).min(1.0);
diff --git a/v2/crates/ruview-swarm/src/planning/probability_grid.rs b/v2/crates/ruview-swarm/src/planning/probability_grid.rs
index 86df94aa..5a44e483 100644
--- a/v2/crates/ruview-swarm/src/planning/probability_grid.rs
+++ b/v2/crates/ruview-swarm/src/planning/probability_grid.rs
@@ -60,7 +60,7 @@ impl ProbabilityGrid {
             for cell in row {
                 let scanned_weight = if cell.last_scanned_ms > 0 { cell.pheromone } else { 0.0 };
                 let score = cell.victim_probability * (1.0 - scanned_weight);
-                if best.as_ref().map_or(true, |(_, bs)| score > *bs) {
+                if best.as_ref().is_none_or(|(_, bs)| score > *bs) {
                     best = Some(((cell.x_idx, cell.y_idx), score));
                 }
             }