fix(ruview-swarm): fail-closed on NaN/Inf at swarm-comm trust boundary (ADR-148)
Beyond-SOTA security review of the ADR-148 drone swarm control plane found four IEEE-754 NaN/Inf fail-open / DoS bugs on data crossing the untrusted swarm-comm boundary (receive_peer_state / receive_peer_detection accept full DroneState/CsiDetection whose f64/f32 fields deserialize with no finite-check). - HIGH: failsafe::tick collision-avoidance + battery checks fail-open on NaN (NaN < threshold == false silently disabled collision avoidance / kept a NaN-battery drone Nominal). Now fails closed to EmergencyDiverge / RTH. - MED: geofence::check NaN-altitude bypass returned Safe through the point-in-polygon path. Now leading non-finite-coordinate guard -> HardBreach. - MED/DoS: antijamming FhssRadio panicked with "% 0" on an empty deserialized channels_mhz. Now len==0 early-returns (benign 0.0 sentinel). - LOW: multiview::fuse propagated a NaN victim_position into the fused "confirmed victim" location. Now requires finite confidence + position. Each fix pinned by a fails-on-old / passes-on-new test (MEASURED: old code returned Nominal/Safe or panicked). cargo test -p ruview-swarm --no-default-features: 117 -> 123 passed, 0 failed. Workspace green; Python deterministic proof unchanged (f8e76f21...46f7a, off the signal path). Documented-not-fixed (ADR slot 176): Raft AppendEntries lacks Log-Matching consistency check (topology/raft.rs); MavlinkSigner::verify uses non-constant -time tag compare + no replay-window rejection (already doc-flagged). Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
0f64d23516
commit
f671000d70
File diff suppressed because one or more lines are too long
|
|
@ -47,8 +47,18 @@ impl FailSafeMachine {
|
||||||
link_alive: bool,
|
link_alive: bool,
|
||||||
nearest_neighbor_dist: f64,
|
nearest_neighbor_dist: f64,
|
||||||
) -> FailSafeState {
|
) -> FailSafeState {
|
||||||
// Collision avoidance has highest priority
|
// Collision avoidance has highest priority.
|
||||||
if nearest_neighbor_dist < self.collision_dist_m {
|
//
|
||||||
|
// Fail CLOSED on a non-finite neighbour distance. `nearest_neighbor_dist`
|
||||||
|
// is derived from peer positions (see
|
||||||
|
// `SwarmOrchestrator::nearest_peer_distance`), which arrive over the
|
||||||
|
// untrusted swarm comm layer as `DroneState` values whose f64 position
|
||||||
|
// fields can deserialize to NaN/Inf. A naive `NaN < collision_dist_m`
|
||||||
|
// evaluates to `false`, silently DISABLING collision avoidance — the
|
||||||
|
// worst possible failure for a physical drone. Treat a non-finite
|
||||||
|
// distance as "too close" so the swarm diverges rather than trusting a
|
||||||
|
// poisoned reading.
|
||||||
|
if !nearest_neighbor_dist.is_finite() || nearest_neighbor_dist < self.collision_dist_m {
|
||||||
self.state = FailSafeState::EmergencyDiverge;
|
self.state = FailSafeState::EmergencyDiverge;
|
||||||
return self.state.clone();
|
return self.state.clone();
|
||||||
}
|
}
|
||||||
|
|
@ -71,8 +81,11 @@ impl FailSafeMachine {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Battery checks
|
// Battery checks. A non-finite battery reading (NaN/Inf from a corrupt or
|
||||||
if state.battery_pct <= self.battery_rth_pct {
|
// forged telemetry/peer message) must fail CLOSED: `NaN <= threshold` is
|
||||||
|
// `false`, which would otherwise let a drone with an unknown battery
|
||||||
|
// level keep flying nominally. Treat a non-finite reading as critical.
|
||||||
|
if !state.battery_pct.is_finite() || state.battery_pct <= self.battery_rth_pct {
|
||||||
self.state = FailSafeState::ReturnToHome;
|
self.state = FailSafeState::ReturnToHome;
|
||||||
} else if state.battery_pct <= self.battery_warn_pct {
|
} else if state.battery_pct <= self.battery_warn_pct {
|
||||||
self.state = FailSafeState::LowBatteryWarn;
|
self.state = FailSafeState::LowBatteryWarn;
|
||||||
|
|
@ -144,4 +157,35 @@ mod tests {
|
||||||
let result = fsm.tick(&s, true, 0.5); // too close
|
let result = fsm.tick(&s, true, 0.5); // too close
|
||||||
assert_eq!(result, FailSafeState::EmergencyDiverge);
|
assert_eq!(result, FailSafeState::EmergencyDiverge);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Security: a NaN neighbour distance (poisoned peer position over the swarm
|
||||||
|
/// comm layer) must NOT silently disable collision avoidance. Fails on old
|
||||||
|
/// code where `NaN < collision_dist_m` is `false` and the state stays Nominal.
|
||||||
|
#[test]
|
||||||
|
fn test_nan_neighbor_distance_fails_closed_to_diverge() {
|
||||||
|
let mut fsm = FailSafeMachine::new();
|
||||||
|
let s = good_state();
|
||||||
|
let result = fsm.tick(&s, true, f64::NAN);
|
||||||
|
assert_eq!(
|
||||||
|
result,
|
||||||
|
FailSafeState::EmergencyDiverge,
|
||||||
|
"non-finite neighbour distance must fail closed to EmergencyDiverge"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Security: a NaN battery reading must fail closed to ReturnToHome rather
|
||||||
|
/// than being treated as a healthy battery. Fails on old code where
|
||||||
|
/// `NaN <= battery_rth_pct` is `false` and the drone stays Nominal.
|
||||||
|
#[test]
|
||||||
|
fn test_nan_battery_fails_closed_to_rth() {
|
||||||
|
let mut fsm = FailSafeMachine::new();
|
||||||
|
let mut s = good_state();
|
||||||
|
s.battery_pct = f32::NAN;
|
||||||
|
let result = fsm.tick(&s, true, 10.0);
|
||||||
|
assert_eq!(
|
||||||
|
result,
|
||||||
|
FailSafeState::ReturnToHome,
|
||||||
|
"non-finite battery must fail closed to ReturnToHome"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -59,8 +59,16 @@ impl FhssRadio {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the current active channel frequency in MHz.
|
/// Returns the current active channel frequency in MHz.
|
||||||
|
///
|
||||||
|
/// `FhssConfig` is `Deserialize`, so `channels_mhz` can arrive empty from a
|
||||||
|
/// malformed or hostile config. An empty channel list would make `% n`
|
||||||
|
/// (n = 0) panic with a divide-by-zero. Guard it and return a benign `0.0`
|
||||||
|
/// sentinel instead of crashing the radio task (DoS-resistance).
|
||||||
pub fn current_channel_mhz(&self) -> f64 {
|
pub fn current_channel_mhz(&self) -> f64 {
|
||||||
let n = self.config.channels_mhz.len();
|
let n = self.config.channels_mhz.len();
|
||||||
|
if n == 0 {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
// XOR node seed into hop index so each node uses a different offset
|
// XOR node seed into hop index so each node uses a different offset
|
||||||
let idx = (self.hop_index ^ (self.node_seed as usize)) % n;
|
let idx = (self.hop_index ^ (self.node_seed as usize)) % n;
|
||||||
self.config.channels_mhz[idx]
|
self.config.channels_mhz[idx]
|
||||||
|
|
@ -68,7 +76,11 @@ impl FhssRadio {
|
||||||
|
|
||||||
/// Advance the hop sequence by one step (call at hop_rate_hz).
|
/// Advance the hop sequence by one step (call at hop_rate_hz).
|
||||||
pub fn next_hop(&mut self) {
|
pub fn next_hop(&mut self) {
|
||||||
self.hop_index = (self.hop_index + 1) % self.config.channels_mhz.len();
|
let n = self.config.channels_mhz.len();
|
||||||
|
if n == 0 {
|
||||||
|
return; // no channels configured — nothing to hop (avoid `% 0` panic)
|
||||||
|
}
|
||||||
|
self.hop_index = (self.hop_index + 1) % n;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Update with latest RSSI measurement. Drives jamming detection.
|
/// Update with latest RSSI measurement. Drives jamming detection.
|
||||||
|
|
@ -97,9 +109,13 @@ impl FhssRadio {
|
||||||
.wrapping_mul(lcg_a)
|
.wrapping_mul(lcg_a)
|
||||||
.wrapping_add(self.evasion_count)
|
.wrapping_add(self.evasion_count)
|
||||||
.wrapping_add(lcg_c);
|
.wrapping_add(lcg_c);
|
||||||
let n = self.config.channels_mhz.len() as u64;
|
let len = self.config.channels_mhz.len();
|
||||||
|
if len == 0 {
|
||||||
|
return; // no channels configured — avoid `% 0` panic
|
||||||
|
}
|
||||||
|
let n = len as u64;
|
||||||
let offset = (seed % n / 4 + 3) as usize;
|
let offset = (seed % n / 4 + 3) as usize;
|
||||||
self.hop_index = (self.hop_index + offset) % self.config.channels_mhz.len();
|
self.hop_index = (self.hop_index + offset) % len;
|
||||||
self.evasion_count += 1;
|
self.evasion_count += 1;
|
||||||
self.rssi_history.clear();
|
self.rssi_history.clear();
|
||||||
}
|
}
|
||||||
|
|
@ -165,6 +181,23 @@ mod tests {
|
||||||
assert_eq!(radio.hop_index, (initial_idx + 2) % 50);
|
assert_eq!(radio.hop_index, (initial_idx + 2) % 50);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Security/DoS: an empty `channels_mhz` (deserialized from a malformed or
|
||||||
|
/// hostile config) must not panic with a `% 0` divide-by-zero. Fails on old
|
||||||
|
/// code, where `next_hop`/`current_channel_mhz`/`evasive_hop`/`tick` all do
|
||||||
|
/// modulo / index by `channels_mhz.len()`.
|
||||||
|
#[test]
|
||||||
|
fn test_empty_channels_does_not_panic() {
|
||||||
|
let cfg = FhssConfig { channels_mhz: vec![], jamming_detect_window: 1, ..Default::default() };
|
||||||
|
let mut radio = FhssRadio::new(7, cfg);
|
||||||
|
// None of these may panic.
|
||||||
|
let _ = radio.current_channel_mhz();
|
||||||
|
radio.next_hop();
|
||||||
|
radio.observe_rssi(-99.0); // window=1 → jamming_detected() true → evasive_hop()
|
||||||
|
radio.tick(100.0);
|
||||||
|
radio.evasive_hop();
|
||||||
|
assert_eq!(radio.current_channel_mhz(), 0.0, "empty channel list returns sentinel");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_channel_in_valid_range() {
|
fn test_channel_in_valid_range() {
|
||||||
let cfg = FhssConfig::default();
|
let cfg = FhssConfig::default();
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,16 @@ pub enum GeofenceResult {
|
||||||
impl Geofence {
|
impl Geofence {
|
||||||
/// Check a position against this geofence.
|
/// Check a position against this geofence.
|
||||||
pub fn check(&self, pos: &Position3D) -> GeofenceResult {
|
pub fn check(&self, pos: &Position3D) -> GeofenceResult {
|
||||||
|
// Fail CLOSED on a non-finite position. A NaN/Inf component (from a
|
||||||
|
// corrupt GPS/EKF estimate or a forged position) makes every subsequent
|
||||||
|
// comparison false: `NaN < min || NaN > max` is `false`, so the altitude
|
||||||
|
// breach is skipped, and a NaN altitude with otherwise-valid x/y would
|
||||||
|
// return `Safe` — a silent geofence bypass on a flight-safety boundary.
|
||||||
|
// Treat any non-finite coordinate as a hard breach.
|
||||||
|
if !pos.x.is_finite() || !pos.y.is_finite() || !pos.z.is_finite() {
|
||||||
|
return GeofenceResult::HardBreach;
|
||||||
|
}
|
||||||
|
|
||||||
let altitude_m = -pos.z; // NED: negative z = altitude above ground
|
let altitude_m = -pos.z; // NED: negative z = altitude above ground
|
||||||
|
|
||||||
// Altitude check
|
// Altitude check
|
||||||
|
|
@ -146,4 +156,29 @@ mod tests {
|
||||||
let pos = Position3D { x: 50.0, y: 50.0, z: -200.0 }; // 200m altitude
|
let pos = Position3D { x: 50.0, y: 50.0, z: -200.0 }; // 200m altitude
|
||||||
assert_eq!(f.check(&pos), GeofenceResult::HardBreach);
|
assert_eq!(f.check(&pos), GeofenceResult::HardBreach);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Security: a NaN altitude with an otherwise in-bounds x/y must fail closed
|
||||||
|
/// to HardBreach. Fails on old code where `NaN < min || NaN > max` is `false`,
|
||||||
|
/// the altitude check is skipped, and the point-in-polygon path returns Safe —
|
||||||
|
/// a silent geofence bypass.
|
||||||
|
#[test]
|
||||||
|
fn test_nan_altitude_fails_closed() {
|
||||||
|
let f = square_fence();
|
||||||
|
let pos = Position3D { x: 50.0, y: 50.0, z: f64::NAN };
|
||||||
|
assert_eq!(f.check(&pos), GeofenceResult::HardBreach);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Security: NaN/Inf horizontal coordinates must also fail closed.
|
||||||
|
#[test]
|
||||||
|
fn test_nonfinite_horizontal_fails_closed() {
|
||||||
|
let f = square_fence();
|
||||||
|
assert_eq!(
|
||||||
|
f.check(&Position3D { x: f64::NAN, y: 50.0, z: -30.0 }),
|
||||||
|
GeofenceResult::HardBreach
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
f.check(&Position3D { x: 50.0, y: f64::INFINITY, z: -30.0 }),
|
||||||
|
GeofenceResult::HardBreach
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -64,10 +64,25 @@ impl MultiViewFusion {
|
||||||
detections: &[CsiDetection],
|
detections: &[CsiDetection],
|
||||||
drone_positions: &[(NodeId, Position3D)],
|
drone_positions: &[(NodeId, Position3D)],
|
||||||
) -> Option<FusedDetection> {
|
) -> Option<FusedDetection> {
|
||||||
// Filter by confidence and require estimated position
|
// Filter by confidence and require a FINITE estimated position.
|
||||||
|
//
|
||||||
|
// A peer detection (received via `receive_peer_detection`) carries f32/f64
|
||||||
|
// fields that can deserialize to NaN/Inf. A NaN `victim_position` passes
|
||||||
|
// `is_some()` and would propagate through the confidence-weighted average
|
||||||
|
// into the fused position — dispatching a NaN "confirmed victim" location
|
||||||
|
// to the swarm. A NaN `confidence` is already rejected by `>= min_confidence`
|
||||||
|
// (NaN comparisons are false), but we make that explicit and also require
|
||||||
|
// the victim position components to be finite. Fail CLOSED: drop poisoned
|
||||||
|
// detections rather than fusing them.
|
||||||
let valid: Vec<(&CsiDetection, &Position3D)> = detections
|
let valid: Vec<(&CsiDetection, &Position3D)> = detections
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|d| d.confidence >= self.min_confidence && d.victim_position.is_some())
|
.filter(|d| {
|
||||||
|
d.confidence.is_finite()
|
||||||
|
&& d.confidence >= self.min_confidence
|
||||||
|
&& d.victim_position
|
||||||
|
.map(|p| p.x.is_finite() && p.y.is_finite() && p.z.is_finite())
|
||||||
|
.unwrap_or(false)
|
||||||
|
})
|
||||||
.filter_map(|d| {
|
.filter_map(|d| {
|
||||||
let drone_pos = drone_positions
|
let drone_pos = drone_positions
|
||||||
.iter()
|
.iter()
|
||||||
|
|
@ -177,4 +192,46 @@ mod tests {
|
||||||
result.uncertainty_m
|
result.uncertainty_m
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Security: a detection with a NaN victim position (poisoned peer report)
|
||||||
|
/// must be dropped, not fused. Fails on old code where the NaN propagates
|
||||||
|
/// into the confidence-weighted average and the fused position is NaN.
|
||||||
|
#[test]
|
||||||
|
fn test_nan_victim_position_dropped_from_fusion() {
|
||||||
|
let fusion = MultiViewFusion { min_viewpoints: 2, min_confidence: 0.5 };
|
||||||
|
let detections = vec![
|
||||||
|
CsiDetection {
|
||||||
|
drone_id: NodeId(0),
|
||||||
|
confidence: 0.9,
|
||||||
|
victim_position: Some(Position3D { x: 50.0, y: 50.0, z: 0.0 }),
|
||||||
|
timestamp_ms: 0,
|
||||||
|
},
|
||||||
|
CsiDetection {
|
||||||
|
drone_id: NodeId(1),
|
||||||
|
confidence: 0.9,
|
||||||
|
victim_position: Some(Position3D { x: f64::NAN, y: 50.0, z: 0.0 }),
|
||||||
|
timestamp_ms: 0,
|
||||||
|
},
|
||||||
|
CsiDetection {
|
||||||
|
drone_id: NodeId(2),
|
||||||
|
confidence: 0.9,
|
||||||
|
victim_position: Some(Position3D { x: 50.0, y: 50.0, z: 0.0 }),
|
||||||
|
timestamp_ms: 0,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
let positions = vec![
|
||||||
|
(NodeId(0), Position3D { x: 0.0, y: 0.0, z: -30.0 }),
|
||||||
|
(NodeId(1), Position3D { x: 100.0, y: 0.0, z: -30.0 }),
|
||||||
|
(NodeId(2), Position3D { x: 50.0, y: 86.6, z: -30.0 }),
|
||||||
|
];
|
||||||
|
// Two finite viewpoints remain → still fuses, but the result must be finite.
|
||||||
|
let result = fusion.fuse(&detections, &positions).unwrap();
|
||||||
|
assert!(
|
||||||
|
result.estimated_position.x.is_finite()
|
||||||
|
&& result.estimated_position.y.is_finite()
|
||||||
|
&& result.estimated_position.z.is_finite(),
|
||||||
|
"fused position must be finite when a NaN detection is present"
|
||||||
|
);
|
||||||
|
assert!(!result.contributing_drones.contains(&NodeId(1)), "NaN detection must be excluded");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue