204 lines
6.7 KiB
Rust
204 lines
6.7 KiB
Rust
//! Identifier sanitization for filesystem paths.
|
|
//!
|
|
//! Defense against directory traversal: the sensing-server has several REST
|
|
//! endpoints that take a user-controlled identifier and use it directly to
|
|
//! build a filesystem path:
|
|
//!
|
|
//! * `recording.rs` — `{session_name}.csi.jsonl` under `RECORDINGS_DIR`
|
|
//! * `model_manager.rs` — `{model_id}.rvf` under `models_dir()`
|
|
//! * `training_api.rs` — `{dataset_id}.csi.jsonl` under `RECORDINGS_DIR`
|
|
//!
|
|
//! Without validation, an attacker can pass `../../etc/passwd` or similar to
|
|
//! read, write, or delete arbitrary files the server process can access. See
|
|
//! issue #615 for the full exploit catalogue.
|
|
//!
|
|
//! [`safe_id`] returns the input only when it is safe to embed in a
|
|
//! `format!()` that builds a path under a fixed parent directory.
|
|
|
|
use std::fmt;
|
|
|
|
/// Maximum length for a safe identifier. 64 is generous for human-typed
|
|
/// session names while keeping the resulting filename well under
|
|
/// most filesystem limits.
|
|
pub const MAX_ID_LEN: usize = 64;
|
|
|
|
/// Error returned by [`safe_id`] when the input is not safe to embed in a
|
|
/// filesystem path.
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum PathSafetyError {
|
|
/// Empty string is never a valid identifier.
|
|
Empty,
|
|
/// Identifier exceeds `MAX_ID_LEN` bytes.
|
|
TooLong { len: usize, max: usize },
|
|
/// Identifier contains a character not in the allowed set
|
|
/// `[A-Za-z0-9._-]` (and the leading character is not `.`).
|
|
/// Path separators, null bytes, parent-directory references, and any
|
|
/// non-printable or non-ASCII characters all hit this.
|
|
InvalidChar { ch: char, position: usize },
|
|
/// Identifier is `"."` or `".."`, or any leading `.` that would
|
|
/// otherwise be interpreted as a hidden file / parent reference.
|
|
LeadingDot,
|
|
}
|
|
|
|
impl fmt::Display for PathSafetyError {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
PathSafetyError::Empty => write!(f, "identifier is empty"),
|
|
PathSafetyError::TooLong { len, max } => {
|
|
write!(f, "identifier is {len} bytes (max {max})")
|
|
}
|
|
PathSafetyError::InvalidChar { ch, position } => write!(
|
|
f,
|
|
"identifier contains invalid character {ch:?} at position {position} \
|
|
(only A-Z, a-z, 0-9, '.', '_', '-' are allowed)"
|
|
),
|
|
PathSafetyError::LeadingDot => write!(
|
|
f,
|
|
"identifier may not start with '.' (would be a hidden file \
|
|
or parent-directory reference)"
|
|
),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::error::Error for PathSafetyError {}
|
|
|
|
/// Return `Ok(input)` if the string is safe to embed in a filesystem path
|
|
/// built under a fixed parent directory; otherwise return a structured error.
|
|
///
|
|
/// The allowed character set is `[A-Za-z0-9._-]`. The first character must
|
|
/// not be `.` (rules out `..`, `.`, and hidden-file shenanigans).
|
|
///
|
|
/// Examples:
|
|
/// ```ignore
|
|
/// assert!(safe_id("my-session_42").is_ok());
|
|
/// assert!(safe_id("session.v2").is_ok());
|
|
/// assert!(safe_id("../../etc/passwd").is_err());
|
|
/// assert!(safe_id("foo/bar").is_err());
|
|
/// assert!(safe_id("..").is_err());
|
|
/// assert!(safe_id(".env").is_err());
|
|
/// assert!(safe_id("").is_err());
|
|
/// ```
|
|
pub fn safe_id(input: &str) -> Result<&str, PathSafetyError> {
|
|
if input.is_empty() {
|
|
return Err(PathSafetyError::Empty);
|
|
}
|
|
if input.len() > MAX_ID_LEN {
|
|
return Err(PathSafetyError::TooLong {
|
|
len: input.len(),
|
|
max: MAX_ID_LEN,
|
|
});
|
|
}
|
|
// Reject leading '.' to block `.`, `..`, `.env`, etc.
|
|
if input.starts_with('.') {
|
|
return Err(PathSafetyError::LeadingDot);
|
|
}
|
|
for (position, ch) in input.chars().enumerate() {
|
|
let ok = ch.is_ascii_alphanumeric() || ch == '.' || ch == '_' || ch == '-';
|
|
if !ok {
|
|
return Err(PathSafetyError::InvalidChar { ch, position });
|
|
}
|
|
}
|
|
Ok(input)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn accepts_simple_alphanumeric() {
|
|
assert!(safe_id("foo").is_ok());
|
|
assert!(safe_id("MyModel123").is_ok());
|
|
assert!(safe_id("session-2026-05-17_v2").is_ok());
|
|
assert!(safe_id("a.b.c").is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn rejects_empty() {
|
|
assert_eq!(safe_id(""), Err(PathSafetyError::Empty));
|
|
}
|
|
|
|
#[test]
|
|
fn rejects_path_separators() {
|
|
assert!(matches!(
|
|
safe_id("foo/bar"),
|
|
Err(PathSafetyError::InvalidChar { ch: '/', .. })
|
|
));
|
|
assert!(matches!(
|
|
safe_id("foo\\bar"),
|
|
Err(PathSafetyError::InvalidChar { ch: '\\', .. })
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn rejects_parent_directory_traversal() {
|
|
assert_eq!(safe_id("."), Err(PathSafetyError::LeadingDot));
|
|
assert_eq!(safe_id(".."), Err(PathSafetyError::LeadingDot));
|
|
assert_eq!(safe_id(".env"), Err(PathSafetyError::LeadingDot));
|
|
// The classic attack vector — even after rejecting leading-dot,
|
|
// the InvalidChar guard catches the embedded slash.
|
|
assert!(matches!(
|
|
safe_id("../../etc/passwd"),
|
|
Err(PathSafetyError::LeadingDot)
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn rejects_null_byte() {
|
|
assert!(matches!(
|
|
safe_id("foo\0bar"),
|
|
Err(PathSafetyError::InvalidChar { ch: '\0', .. })
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn rejects_whitespace_and_specials() {
|
|
assert!(matches!(
|
|
safe_id("foo bar"),
|
|
Err(PathSafetyError::InvalidChar { ch: ' ', .. })
|
|
));
|
|
assert!(matches!(
|
|
safe_id("foo;rm -rf /"),
|
|
Err(PathSafetyError::InvalidChar { .. })
|
|
));
|
|
assert!(matches!(
|
|
safe_id("foo$bar"),
|
|
Err(PathSafetyError::InvalidChar { ch: '$', .. })
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn rejects_non_ascii() {
|
|
// Reject unicode that could normalise to path separators in
|
|
// weird filesystems, or just look like ASCII.
|
|
assert!(matches!(
|
|
safe_id("café"),
|
|
Err(PathSafetyError::InvalidChar { .. })
|
|
));
|
|
// Fullwidth slash (U+FF0F) — visually similar to '/'.
|
|
assert!(matches!(
|
|
safe_id("foo\u{FF0F}bar"),
|
|
Err(PathSafetyError::InvalidChar { .. })
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn rejects_too_long() {
|
|
let too_long = "a".repeat(MAX_ID_LEN + 1);
|
|
assert_eq!(
|
|
safe_id(&too_long),
|
|
Err(PathSafetyError::TooLong {
|
|
len: MAX_ID_LEN + 1,
|
|
max: MAX_ID_LEN
|
|
})
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn boundary_max_len() {
|
|
let at_max = "a".repeat(MAX_ID_LEN);
|
|
assert!(safe_id(&at_max).is_ok());
|
|
}
|
|
}
|