fix(ruview-swarm): fail-closed on NaN/Inf at swarm-comm trust boundary (ADR-148)
Beyond-SOTA security review of the ADR-148 drone swarm control plane found four IEEE-754 NaN/Inf fail-open / DoS bugs on data crossing the untrusted swarm-comm boundary (receive_peer_state / receive_peer_detection accept full DroneState/CsiDetection whose f64/f32 fields deserialize with no finite-check). - HIGH: failsafe::tick collision-avoidance + battery checks fail-open on NaN (NaN < threshold == false silently disabled collision avoidance / kept a NaN-battery drone Nominal). Now fails closed to EmergencyDiverge / RTH. - MED: geofence::check NaN-altitude bypass returned Safe through the point-in-polygon path. Now leading non-finite-coordinate guard -> HardBreach. - MED/DoS: antijamming FhssRadio panicked with "% 0" on an empty deserialized channels_mhz. Now len==0 early-returns (benign 0.0 sentinel). - LOW: multiview::fuse propagated a NaN victim_position into the fused "confirmed victim" location. Now requires finite confidence + position. Each fix pinned by a fails-on-old / passes-on-new test (MEASURED: old code returned Nominal/Safe or panicked). cargo test -p ruview-swarm --no-default-features: 117 -> 123 passed, 0 failed. Workspace green; Python deterministic proof unchanged (f8e76f21...46f7a, off the signal path). Documented-not-fixed (ADR slot 176): Raft AppendEntries lacks Log-Matching consistency check (topology/raft.rs); MavlinkSigner::verify uses non-constant -time tag compare + no replay-window rejection (already doc-flagged). Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
0f64d23516
commit
f671000d70
File diff suppressed because one or more lines are too long
|
|
@ -47,8 +47,18 @@ impl FailSafeMachine {
|
|||
link_alive: bool,
|
||||
nearest_neighbor_dist: f64,
|
||||
) -> FailSafeState {
|
||||
// Collision avoidance has highest priority
|
||||
if nearest_neighbor_dist < self.collision_dist_m {
|
||||
// Collision avoidance has highest priority.
|
||||
//
|
||||
// Fail CLOSED on a non-finite neighbour distance. `nearest_neighbor_dist`
|
||||
// is derived from peer positions (see
|
||||
// `SwarmOrchestrator::nearest_peer_distance`), which arrive over the
|
||||
// untrusted swarm comm layer as `DroneState` values whose f64 position
|
||||
// fields can deserialize to NaN/Inf. A naive `NaN < collision_dist_m`
|
||||
// evaluates to `false`, silently DISABLING collision avoidance — the
|
||||
// worst possible failure for a physical drone. Treat a non-finite
|
||||
// distance as "too close" so the swarm diverges rather than trusting a
|
||||
// poisoned reading.
|
||||
if !nearest_neighbor_dist.is_finite() || nearest_neighbor_dist < self.collision_dist_m {
|
||||
self.state = FailSafeState::EmergencyDiverge;
|
||||
return self.state.clone();
|
||||
}
|
||||
|
|
@ -71,8 +81,11 @@ impl FailSafeMachine {
|
|||
}
|
||||
}
|
||||
|
||||
// Battery checks
|
||||
if state.battery_pct <= self.battery_rth_pct {
|
||||
// Battery checks. A non-finite battery reading (NaN/Inf from a corrupt or
|
||||
// forged telemetry/peer message) must fail CLOSED: `NaN <= threshold` is
|
||||
// `false`, which would otherwise let a drone with an unknown battery
|
||||
// level keep flying nominally. Treat a non-finite reading as critical.
|
||||
if !state.battery_pct.is_finite() || state.battery_pct <= self.battery_rth_pct {
|
||||
self.state = FailSafeState::ReturnToHome;
|
||||
} else if state.battery_pct <= self.battery_warn_pct {
|
||||
self.state = FailSafeState::LowBatteryWarn;
|
||||
|
|
@ -144,4 +157,35 @@ mod tests {
|
|||
let result = fsm.tick(&s, true, 0.5); // too close
|
||||
assert_eq!(result, FailSafeState::EmergencyDiverge);
|
||||
}
|
||||
|
||||
/// Security: a NaN neighbour distance (poisoned peer position over the swarm
|
||||
/// comm layer) must NOT silently disable collision avoidance. Fails on old
|
||||
/// code where `NaN < collision_dist_m` is `false` and the state stays Nominal.
|
||||
#[test]
|
||||
fn test_nan_neighbor_distance_fails_closed_to_diverge() {
|
||||
let mut fsm = FailSafeMachine::new();
|
||||
let s = good_state();
|
||||
let result = fsm.tick(&s, true, f64::NAN);
|
||||
assert_eq!(
|
||||
result,
|
||||
FailSafeState::EmergencyDiverge,
|
||||
"non-finite neighbour distance must fail closed to EmergencyDiverge"
|
||||
);
|
||||
}
|
||||
|
||||
/// Security: a NaN battery reading must fail closed to ReturnToHome rather
|
||||
/// than being treated as a healthy battery. Fails on old code where
|
||||
/// `NaN <= battery_rth_pct` is `false` and the drone stays Nominal.
|
||||
#[test]
|
||||
fn test_nan_battery_fails_closed_to_rth() {
|
||||
let mut fsm = FailSafeMachine::new();
|
||||
let mut s = good_state();
|
||||
s.battery_pct = f32::NAN;
|
||||
let result = fsm.tick(&s, true, 10.0);
|
||||
assert_eq!(
|
||||
result,
|
||||
FailSafeState::ReturnToHome,
|
||||
"non-finite battery must fail closed to ReturnToHome"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,8 +59,16 @@ impl FhssRadio {
|
|||
}
|
||||
|
||||
/// Returns the current active channel frequency in MHz.
|
||||
///
|
||||
/// `FhssConfig` is `Deserialize`, so `channels_mhz` can arrive empty from a
|
||||
/// malformed or hostile config. An empty channel list would make `% n`
|
||||
/// (n = 0) panic with a divide-by-zero. Guard it and return a benign `0.0`
|
||||
/// sentinel instead of crashing the radio task (DoS-resistance).
|
||||
pub fn current_channel_mhz(&self) -> f64 {
|
||||
let n = self.config.channels_mhz.len();
|
||||
if n == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
// XOR node seed into hop index so each node uses a different offset
|
||||
let idx = (self.hop_index ^ (self.node_seed as usize)) % n;
|
||||
self.config.channels_mhz[idx]
|
||||
|
|
@ -68,7 +76,11 @@ impl FhssRadio {
|
|||
|
||||
/// Advance the hop sequence by one step (call at hop_rate_hz).
|
||||
pub fn next_hop(&mut self) {
|
||||
self.hop_index = (self.hop_index + 1) % self.config.channels_mhz.len();
|
||||
let n = self.config.channels_mhz.len();
|
||||
if n == 0 {
|
||||
return; // no channels configured — nothing to hop (avoid `% 0` panic)
|
||||
}
|
||||
self.hop_index = (self.hop_index + 1) % n;
|
||||
}
|
||||
|
||||
/// Update with latest RSSI measurement. Drives jamming detection.
|
||||
|
|
@ -97,9 +109,13 @@ impl FhssRadio {
|
|||
.wrapping_mul(lcg_a)
|
||||
.wrapping_add(self.evasion_count)
|
||||
.wrapping_add(lcg_c);
|
||||
let n = self.config.channels_mhz.len() as u64;
|
||||
let len = self.config.channels_mhz.len();
|
||||
if len == 0 {
|
||||
return; // no channels configured — avoid `% 0` panic
|
||||
}
|
||||
let n = len as u64;
|
||||
let offset = (seed % n / 4 + 3) as usize;
|
||||
self.hop_index = (self.hop_index + offset) % self.config.channels_mhz.len();
|
||||
self.hop_index = (self.hop_index + offset) % len;
|
||||
self.evasion_count += 1;
|
||||
self.rssi_history.clear();
|
||||
}
|
||||
|
|
@ -165,6 +181,23 @@ mod tests {
|
|||
assert_eq!(radio.hop_index, (initial_idx + 2) % 50);
|
||||
}
|
||||
|
||||
/// Security/DoS: an empty `channels_mhz` (deserialized from a malformed or
|
||||
/// hostile config) must not panic with a `% 0` divide-by-zero. Fails on old
|
||||
/// code, where `next_hop`/`current_channel_mhz`/`evasive_hop`/`tick` all do
|
||||
/// modulo / index by `channels_mhz.len()`.
|
||||
#[test]
|
||||
fn test_empty_channels_does_not_panic() {
|
||||
let cfg = FhssConfig { channels_mhz: vec![], jamming_detect_window: 1, ..Default::default() };
|
||||
let mut radio = FhssRadio::new(7, cfg);
|
||||
// None of these may panic.
|
||||
let _ = radio.current_channel_mhz();
|
||||
radio.next_hop();
|
||||
radio.observe_rssi(-99.0); // window=1 → jamming_detected() true → evasive_hop()
|
||||
radio.tick(100.0);
|
||||
radio.evasive_hop();
|
||||
assert_eq!(radio.current_channel_mhz(), 0.0, "empty channel list returns sentinel");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_channel_in_valid_range() {
|
||||
let cfg = FhssConfig::default();
|
||||
|
|
|
|||
|
|
@ -27,6 +27,16 @@ pub enum GeofenceResult {
|
|||
impl Geofence {
|
||||
/// Check a position against this geofence.
|
||||
pub fn check(&self, pos: &Position3D) -> GeofenceResult {
|
||||
// Fail CLOSED on a non-finite position. A NaN/Inf component (from a
|
||||
// corrupt GPS/EKF estimate or a forged position) makes every subsequent
|
||||
// comparison false: `NaN < min || NaN > max` is `false`, so the altitude
|
||||
// breach is skipped, and a NaN altitude with otherwise-valid x/y would
|
||||
// return `Safe` — a silent geofence bypass on a flight-safety boundary.
|
||||
// Treat any non-finite coordinate as a hard breach.
|
||||
if !pos.x.is_finite() || !pos.y.is_finite() || !pos.z.is_finite() {
|
||||
return GeofenceResult::HardBreach;
|
||||
}
|
||||
|
||||
let altitude_m = -pos.z; // NED: negative z = altitude above ground
|
||||
|
||||
// Altitude check
|
||||
|
|
@ -146,4 +156,29 @@ mod tests {
|
|||
let pos = Position3D { x: 50.0, y: 50.0, z: -200.0 }; // 200m altitude
|
||||
assert_eq!(f.check(&pos), GeofenceResult::HardBreach);
|
||||
}
|
||||
|
||||
/// Security: a NaN altitude with an otherwise in-bounds x/y must fail closed
|
||||
/// to HardBreach. Fails on old code where `NaN < min || NaN > max` is `false`,
|
||||
/// the altitude check is skipped, and the point-in-polygon path returns Safe —
|
||||
/// a silent geofence bypass.
|
||||
#[test]
|
||||
fn test_nan_altitude_fails_closed() {
|
||||
let f = square_fence();
|
||||
let pos = Position3D { x: 50.0, y: 50.0, z: f64::NAN };
|
||||
assert_eq!(f.check(&pos), GeofenceResult::HardBreach);
|
||||
}
|
||||
|
||||
/// Security: NaN/Inf horizontal coordinates must also fail closed.
|
||||
#[test]
|
||||
fn test_nonfinite_horizontal_fails_closed() {
|
||||
let f = square_fence();
|
||||
assert_eq!(
|
||||
f.check(&Position3D { x: f64::NAN, y: 50.0, z: -30.0 }),
|
||||
GeofenceResult::HardBreach
|
||||
);
|
||||
assert_eq!(
|
||||
f.check(&Position3D { x: 50.0, y: f64::INFINITY, z: -30.0 }),
|
||||
GeofenceResult::HardBreach
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,10 +64,25 @@ impl MultiViewFusion {
|
|||
detections: &[CsiDetection],
|
||||
drone_positions: &[(NodeId, Position3D)],
|
||||
) -> Option<FusedDetection> {
|
||||
// Filter by confidence and require estimated position
|
||||
// Filter by confidence and require a FINITE estimated position.
|
||||
//
|
||||
// A peer detection (received via `receive_peer_detection`) carries f32/f64
|
||||
// fields that can deserialize to NaN/Inf. A NaN `victim_position` passes
|
||||
// `is_some()` and would propagate through the confidence-weighted average
|
||||
// into the fused position — dispatching a NaN "confirmed victim" location
|
||||
// to the swarm. A NaN `confidence` is already rejected by `>= min_confidence`
|
||||
// (NaN comparisons are false), but we make that explicit and also require
|
||||
// the victim position components to be finite. Fail CLOSED: drop poisoned
|
||||
// detections rather than fusing them.
|
||||
let valid: Vec<(&CsiDetection, &Position3D)> = detections
|
||||
.iter()
|
||||
.filter(|d| d.confidence >= self.min_confidence && d.victim_position.is_some())
|
||||
.filter(|d| {
|
||||
d.confidence.is_finite()
|
||||
&& d.confidence >= self.min_confidence
|
||||
&& d.victim_position
|
||||
.map(|p| p.x.is_finite() && p.y.is_finite() && p.z.is_finite())
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.filter_map(|d| {
|
||||
let drone_pos = drone_positions
|
||||
.iter()
|
||||
|
|
@ -177,4 +192,46 @@ mod tests {
|
|||
result.uncertainty_m
|
||||
);
|
||||
}
|
||||
|
||||
/// Security: a detection with a NaN victim position (poisoned peer report)
|
||||
/// must be dropped, not fused. Fails on old code where the NaN propagates
|
||||
/// into the confidence-weighted average and the fused position is NaN.
|
||||
#[test]
|
||||
fn test_nan_victim_position_dropped_from_fusion() {
|
||||
let fusion = MultiViewFusion { min_viewpoints: 2, min_confidence: 0.5 };
|
||||
let detections = vec![
|
||||
CsiDetection {
|
||||
drone_id: NodeId(0),
|
||||
confidence: 0.9,
|
||||
victim_position: Some(Position3D { x: 50.0, y: 50.0, z: 0.0 }),
|
||||
timestamp_ms: 0,
|
||||
},
|
||||
CsiDetection {
|
||||
drone_id: NodeId(1),
|
||||
confidence: 0.9,
|
||||
victim_position: Some(Position3D { x: f64::NAN, y: 50.0, z: 0.0 }),
|
||||
timestamp_ms: 0,
|
||||
},
|
||||
CsiDetection {
|
||||
drone_id: NodeId(2),
|
||||
confidence: 0.9,
|
||||
victim_position: Some(Position3D { x: 50.0, y: 50.0, z: 0.0 }),
|
||||
timestamp_ms: 0,
|
||||
},
|
||||
];
|
||||
let positions = vec![
|
||||
(NodeId(0), Position3D { x: 0.0, y: 0.0, z: -30.0 }),
|
||||
(NodeId(1), Position3D { x: 100.0, y: 0.0, z: -30.0 }),
|
||||
(NodeId(2), Position3D { x: 50.0, y: 86.6, z: -30.0 }),
|
||||
];
|
||||
// Two finite viewpoints remain → still fuses, but the result must be finite.
|
||||
let result = fusion.fuse(&detections, &positions).unwrap();
|
||||
assert!(
|
||||
result.estimated_position.x.is_finite()
|
||||
&& result.estimated_position.y.is_finite()
|
||||
&& result.estimated_position.z.is_finite(),
|
||||
"fused position must be finite when a NaN detection is present"
|
||||
);
|
||||
assert!(!result.contributing_drones.contains(&NodeId(1)), "NaN detection must be excluded");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue