diff --git a/CHANGELOG.md b/CHANGELOG.md index 96ac8319..f2655037 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Security +- **Path-traversal vulnerabilities patched in five sensing-server endpoints** (closes #615 — critical). New `wifi_densepose_sensing_server::path_safety::safe_id()` enforces `[A-Za-z0-9._-]` only (no leading `.`, max 64 chars) before any user-controlled identifier reaches a `format!()` building a filesystem path. Applied at: + - `POST /api/v1/recording/start` (`recording.rs` — `session_name`) + - `GET /api/v1/recording/download/:id` (`recording.rs` — `id`) + - `DELETE /api/v1/recording/delete/:id` (`recording.rs` — `id`) + - `POST /api/v1/models/load` (`model_manager.rs` — `model_id`) + - `training_api.rs` `load_recording_frames` (`dataset_id`s) + + Pre-fix, unauthenticated callers could read `../../etc/passwd`-style paths, write arbitrary JSONL files, load attacker-controlled `.rvf` model files, or delete arbitrary files the server process could touch. 9 unit tests in `path_safety::tests` exercise the rejection envelope (empty, too-long, path separators, parent-dir traversal, null byte, whitespace/specials, non-ASCII). + ### Fixed - **Proof replay (`archive/v1/data/proof/verify.py`) is now cross-platform deterministic** (closes #560). Three changes together: (1) `features_to_bytes()` now `np.round(.., HASH_QUANTIZATION_DECIMALS=6)`s each feature array before packing as little-endian f64, collapsing ULP-level drift from scipy.fft pocketfft SIMD reordering; (2) the `Verify Pipeline Determinism` workflow pins `OMP_NUM_THREADS=1`, `OPENBLAS_NUM_THREADS=1`, `MKL_NUM_THREADS=1`, `VECLIB_MAXIMUM_THREADS=1`, `NUMEXPR_NUM_THREADS=1` — multi-threaded BLAS reductions were a deeper source of non-determinism than SIMD reordering, and 6-decimal quantization alone wasn't enough across Azure VM microarchitectures; (3) `expected_features.sha256` regenerated under the new conditions. CI now passes the determinism check (same hash across consecutive runs on canonical Linux x86_64 CI runner: `667eb054c44ac510342665bf9c93d608868a8ead948ae8774b2796ebce6f8fe7`). `scripts/probe-fft-platform.py` updated to mirror `HASH_QUANTIZATION_DECIMALS=6` for cross-machine spot-checks. - **`archive/v1/src/services/pose_service.py:223` calls the right method on `PhaseSanitizer`** (closes #612). The call was `self.phase_sanitizer.sanitize(phase_data)`, but `PhaseSanitizer`'s full-pipeline entry point is named `sanitize_phase()` (`unwrap_phase` + `remove_outliers` + `smooth_phase` chained, see `archive/v1/src/core/phase_sanitizer.py:266`). The shorter `sanitize` name doesn't exist on the class, so any path that reached this branch raised `AttributeError` and crashed the pose service mid-frame. diff --git a/scripts/fix-markers.json b/scripts/fix-markers.json index 70d32374..abd53810 100644 --- a/scripts/fix-markers.json +++ b/scripts/fix-markers.json @@ -172,6 +172,22 @@ "rationale": "The per-node TDM/channel overlay intentionally omits WiFi creds (those live in the base flash image). Without --force-partial the issue #391 wifi-trio guard in provision.py rejects the call and breaks the Swarm Test (ADR-062) job. Was red on main for ~5 weeks before PR #590.", "ref": "https://github.com/ruvnet/RuView/pull/590" }, + { + "id": "RuView#615", + "title": "path_safety::safe_id gates user-controlled IDs at filesystem boundaries", + "files": [ + "v2/crates/wifi-densepose-sensing-server/src/path_safety.rs", + "v2/crates/wifi-densepose-sensing-server/src/recording.rs", + "v2/crates/wifi-densepose-sensing-server/src/model_manager.rs", + "v2/crates/wifi-densepose-sensing-server/src/training_api.rs" + ], + "require": [ + "path_safety::safe_id", + "pub fn safe_id" + ], + "rationale": "Five endpoints used to embed user-controlled identifiers (session_name, model_id, dataset_id, recording id) into format!() paths with no sanitization, allowing classic '../../etc/passwd' reads, writes, and deletes on the server filesystem. The safe_id helper enforces [A-Za-z0-9._-] only (no leading '.', max 64 chars) and must run before any user input reaches a format!() that builds a path. Removing the helper or skipping it at any of these call sites reintroduces the #615 attack surface.", + "ref": "https://github.com/ruvnet/RuView/issues/615" + }, { "id": "RuView#560", "title": "verify.py quantizes features before SHA-256 for cross-platform hash stability", diff --git a/v2/crates/wifi-densepose-sensing-server/src/lib.rs b/v2/crates/wifi-densepose-sensing-server/src/lib.rs index 41a959c1..316498b3 100644 --- a/v2/crates/wifi-densepose-sensing-server/src/lib.rs +++ b/v2/crates/wifi-densepose-sensing-server/src/lib.rs @@ -10,6 +10,7 @@ pub mod bearer_auth; pub mod host_validation; pub mod introspection; +pub mod path_safety; pub mod vital_signs; pub mod rvf_container; pub mod rvf_pipeline; diff --git a/v2/crates/wifi-densepose-sensing-server/src/model_manager.rs b/v2/crates/wifi-densepose-sensing-server/src/model_manager.rs index 4a960970..6cbeebef 100644 --- a/v2/crates/wifi-densepose-sensing-server/src/model_manager.rs +++ b/v2/crates/wifi-densepose-sensing-server/src/model_manager.rs @@ -215,6 +215,11 @@ async fn scan_models() -> Vec { /// Load a model from disk by ID and return its `LoadedModelState`. fn load_model_from_disk(model_id: &str) -> Result { + // Path-traversal guard (#615). Reject any model_id that contains '/', + // '..', null bytes, or anything outside [A-Za-z0-9._-]. The reject + // happens before format!() so the path can never escape models_dir(). + let model_id = crate::path_safety::safe_id(model_id) + .map_err(|e| format!("Invalid model_id: {e}"))?; let file_path = models_dir().join(format!("{model_id}.rvf")); let reader = RvfReader::from_file(&file_path)?; diff --git a/v2/crates/wifi-densepose-sensing-server/src/path_safety.rs b/v2/crates/wifi-densepose-sensing-server/src/path_safety.rs new file mode 100644 index 00000000..6f2fde71 --- /dev/null +++ b/v2/crates/wifi-densepose-sensing-server/src/path_safety.rs @@ -0,0 +1,203 @@ +//! Identifier sanitization for filesystem paths. +//! +//! Defense against directory traversal: the sensing-server has several REST +//! endpoints that take a user-controlled identifier and use it directly to +//! build a filesystem path: +//! +//! * `recording.rs` — `{session_name}.csi.jsonl` under `RECORDINGS_DIR` +//! * `model_manager.rs` — `{model_id}.rvf` under `models_dir()` +//! * `training_api.rs` — `{dataset_id}.csi.jsonl` under `RECORDINGS_DIR` +//! +//! Without validation, an attacker can pass `../../etc/passwd` or similar to +//! read, write, or delete arbitrary files the server process can access. See +//! issue #615 for the full exploit catalogue. +//! +//! [`safe_id`] returns the input only when it is safe to embed in a +//! `format!()` that builds a path under a fixed parent directory. + +use std::fmt; + +/// Maximum length for a safe identifier. 64 is generous for human-typed +/// session names while keeping the resulting filename well under +/// most filesystem limits. +pub const MAX_ID_LEN: usize = 64; + +/// Error returned by [`safe_id`] when the input is not safe to embed in a +/// filesystem path. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum PathSafetyError { + /// Empty string is never a valid identifier. + Empty, + /// Identifier exceeds `MAX_ID_LEN` bytes. + TooLong { len: usize, max: usize }, + /// Identifier contains a character not in the allowed set + /// `[A-Za-z0-9._-]` (and the leading character is not `.`). + /// Path separators, null bytes, parent-directory references, and any + /// non-printable or non-ASCII characters all hit this. + InvalidChar { ch: char, position: usize }, + /// Identifier is `"."` or `".."`, or any leading `.` that would + /// otherwise be interpreted as a hidden file / parent reference. + LeadingDot, +} + +impl fmt::Display for PathSafetyError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PathSafetyError::Empty => write!(f, "identifier is empty"), + PathSafetyError::TooLong { len, max } => { + write!(f, "identifier is {len} bytes (max {max})") + } + PathSafetyError::InvalidChar { ch, position } => write!( + f, + "identifier contains invalid character {ch:?} at position {position} \ + (only A-Z, a-z, 0-9, '.', '_', '-' are allowed)" + ), + PathSafetyError::LeadingDot => write!( + f, + "identifier may not start with '.' (would be a hidden file \ + or parent-directory reference)" + ), + } + } +} + +impl std::error::Error for PathSafetyError {} + +/// Return `Ok(input)` if the string is safe to embed in a filesystem path +/// built under a fixed parent directory; otherwise return a structured error. +/// +/// The allowed character set is `[A-Za-z0-9._-]`. The first character must +/// not be `.` (rules out `..`, `.`, and hidden-file shenanigans). +/// +/// Examples: +/// ```ignore +/// assert!(safe_id("my-session_42").is_ok()); +/// assert!(safe_id("session.v2").is_ok()); +/// assert!(safe_id("../../etc/passwd").is_err()); +/// assert!(safe_id("foo/bar").is_err()); +/// assert!(safe_id("..").is_err()); +/// assert!(safe_id(".env").is_err()); +/// assert!(safe_id("").is_err()); +/// ``` +pub fn safe_id(input: &str) -> Result<&str, PathSafetyError> { + if input.is_empty() { + return Err(PathSafetyError::Empty); + } + if input.len() > MAX_ID_LEN { + return Err(PathSafetyError::TooLong { + len: input.len(), + max: MAX_ID_LEN, + }); + } + // Reject leading '.' to block `.`, `..`, `.env`, etc. + if input.starts_with('.') { + return Err(PathSafetyError::LeadingDot); + } + for (position, ch) in input.chars().enumerate() { + let ok = ch.is_ascii_alphanumeric() || ch == '.' || ch == '_' || ch == '-'; + if !ok { + return Err(PathSafetyError::InvalidChar { ch, position }); + } + } + Ok(input) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn accepts_simple_alphanumeric() { + assert!(safe_id("foo").is_ok()); + assert!(safe_id("MyModel123").is_ok()); + assert!(safe_id("session-2026-05-17_v2").is_ok()); + assert!(safe_id("a.b.c").is_ok()); + } + + #[test] + fn rejects_empty() { + assert_eq!(safe_id(""), Err(PathSafetyError::Empty)); + } + + #[test] + fn rejects_path_separators() { + assert!(matches!( + safe_id("foo/bar"), + Err(PathSafetyError::InvalidChar { ch: '/', .. }) + )); + assert!(matches!( + safe_id("foo\\bar"), + Err(PathSafetyError::InvalidChar { ch: '\\', .. }) + )); + } + + #[test] + fn rejects_parent_directory_traversal() { + assert_eq!(safe_id("."), Err(PathSafetyError::LeadingDot)); + assert_eq!(safe_id(".."), Err(PathSafetyError::LeadingDot)); + assert_eq!(safe_id(".env"), Err(PathSafetyError::LeadingDot)); + // The classic attack vector — even after rejecting leading-dot, + // the InvalidChar guard catches the embedded slash. + assert!(matches!( + safe_id("../../etc/passwd"), + Err(PathSafetyError::LeadingDot) + )); + } + + #[test] + fn rejects_null_byte() { + assert!(matches!( + safe_id("foo\0bar"), + Err(PathSafetyError::InvalidChar { ch: '\0', .. }) + )); + } + + #[test] + fn rejects_whitespace_and_specials() { + assert!(matches!( + safe_id("foo bar"), + Err(PathSafetyError::InvalidChar { ch: ' ', .. }) + )); + assert!(matches!( + safe_id("foo;rm -rf /"), + Err(PathSafetyError::InvalidChar { .. }) + )); + assert!(matches!( + safe_id("foo$bar"), + Err(PathSafetyError::InvalidChar { ch: '$', .. }) + )); + } + + #[test] + fn rejects_non_ascii() { + // Reject unicode that could normalise to path separators in + // weird filesystems, or just look like ASCII. + assert!(matches!( + safe_id("café"), + Err(PathSafetyError::InvalidChar { .. }) + )); + // Fullwidth slash (U+FF0F) — visually similar to '/'. + assert!(matches!( + safe_id("foo\u{FF0F}bar"), + Err(PathSafetyError::InvalidChar { .. }) + )); + } + + #[test] + fn rejects_too_long() { + let too_long = "a".repeat(MAX_ID_LEN + 1); + assert_eq!( + safe_id(&too_long), + Err(PathSafetyError::TooLong { + len: MAX_ID_LEN + 1, + max: MAX_ID_LEN + }) + ); + } + + #[test] + fn boundary_max_len() { + let at_max = "a".repeat(MAX_ID_LEN); + assert!(safe_id(&at_max).is_ok()); + } +} diff --git a/v2/crates/wifi-densepose-sensing-server/src/recording.rs b/v2/crates/wifi-densepose-sensing-server/src/recording.rs index 4170c4ce..7a56add7 100644 --- a/v2/crates/wifi-densepose-sensing-server/src/recording.rs +++ b/v2/crates/wifi-densepose-sensing-server/src/recording.rs @@ -274,9 +274,22 @@ async fn start_recording( })); } + // Validate session_name BEFORE embedding it in a path. The legacy + // `replace(' ', "_")` only normalised whitespace, not path traversal + // (#615). Reject any session_name containing path separators or + // parent-directory references. + let safe_name = match crate::path_safety::safe_id(&body.session_name) { + Ok(n) => n, + Err(e) => { + return Json(serde_json::json!({ + "status": "error", + "message": format!("Invalid session_name: {e}"), + })); + } + }; let session_id = format!( "{}-{}", - body.session_name.replace(' ', "_"), + safe_name, chrono::Utc::now().format("%Y%m%d_%H%M%S") ); let file_name = format!("{session_id}.csi.jsonl"); @@ -346,6 +359,23 @@ async fn download_recording( State(_state): State, AxumPath(id): AxumPath, ) -> impl IntoResponse { + // Path-traversal guard (#615). Reject any id that contains '/', '..', + // null bytes, or anything outside [A-Za-z0-9._-] BEFORE building the + // path. Otherwise GET /api/v1/recording/download/../../.env leaks + // arbitrary files the server process can read. + let id = match crate::path_safety::safe_id(&id) { + Ok(s) => s.to_string(), + Err(e) => { + return ( + axum::http::StatusCode::BAD_REQUEST, + Json(serde_json::json!({ + "status": "error", + "message": format!("Invalid recording id: {e}"), + })), + ) + .into_response(); + } + }; let dir = PathBuf::from(RECORDINGS_DIR); // Find the JSONL file matching the ID. let file_path = dir.join(format!("{id}.csi.jsonl")); @@ -390,6 +420,19 @@ async fn delete_recording( State(_state): State, AxumPath(id): AxumPath, ) -> Json { + // Path-traversal guard (#615). Reject any id that contains '/', '..', + // null bytes, or anything outside [A-Za-z0-9._-] BEFORE building the + // paths. Otherwise DELETE /api/v1/recording/delete/../../config/database + // can remove arbitrary files the server process can write. + let id = match crate::path_safety::safe_id(&id) { + Ok(s) => s.to_string(), + Err(e) => { + return Json(serde_json::json!({ + "status": "error", + "message": format!("Invalid recording id: {e}"), + })); + } + }; let dir = PathBuf::from(RECORDINGS_DIR); let jsonl_path = dir.join(format!("{id}.csi.jsonl")); let meta_path = dir.join(format!("{id}.csi.meta.json")); diff --git a/v2/crates/wifi-densepose-sensing-server/src/training_api.rs b/v2/crates/wifi-densepose-sensing-server/src/training_api.rs index 1aafb13b..32b32002 100644 --- a/v2/crates/wifi-densepose-sensing-server/src/training_api.rs +++ b/v2/crates/wifi-densepose-sensing-server/src/training_api.rs @@ -239,7 +239,18 @@ async fn load_recording_frames(dataset_ids: &[String]) -> Vec { let recordings_dir = PathBuf::from(RECORDINGS_DIR); for id in dataset_ids { - let file_path = recordings_dir.join(format!("{id}.csi.jsonl")); + // Path-traversal guard (#615). Reject any dataset_id that contains + // '/', '..', null bytes, or anything outside [A-Za-z0-9._-] BEFORE + // building the format!() path. Otherwise an attacker could read any + // file the server process can access via `dataset_ids: ["../../etc/passwd"]`. + let safe = match crate::path_safety::safe_id(id) { + Ok(s) => s, + Err(e) => { + warn!("Skipping invalid dataset_id {id:?}: {e}"); + continue; + } + }; + let file_path = recordings_dir.join(format!("{safe}.csi.jsonl")); let data = match tokio::fs::read_to_string(&file_path).await { Ok(d) => d, Err(e) => {