From 31e7e9d285f2e62b923b3b8e311bf7919c9eccab Mon Sep 17 00:00:00 2001 From: Mikolaj Wielgus Date: Thu, 21 Nov 2024 22:05:18 +0100 Subject: [PATCH] feat(specctra): sanitize DSN input by safelisting chars where possible I have implemented sanitization for every `.next_char()` and `.peek_char()` that I could without causing an error. It turned out that there are only two minor places where I can do that for `.next_char()`, and none for `.peek_char()`. This is for demonstrative purposes. I don't expect to merge this. --- src/specctra/read.rs | 53 +++++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/src/specctra/read.rs b/src/specctra/read.rs index 067439e..81d2662 100644 --- a/src/specctra/read.rs +++ b/src/specctra/read.rs @@ -7,6 +7,8 @@ use utf8_chars::BufReadCharsExt; pub enum ParseError { #[error("unexpected end of file")] Eof, + #[error("forbidden character")] + ForbiddenChar(char), #[error(transparent)] Io(#[from] std::io::Error), #[error("expected {0}")] @@ -201,7 +203,12 @@ impl ListTokenizer { } fn next_char(&mut self) -> Result { - let return_chr = self.peek_char()?; + let chr = self.unsanitized_next_char()?; + self.sanitize(chr) + } + + fn unsanitized_next_char(&mut self) -> Result { + let return_chr = self.unsanitized_peek_char()?; self.peeked_char = None; if return_chr == '\n' { @@ -215,6 +222,11 @@ impl ListTokenizer { } fn peek_char(&mut self) -> Result { + let chr = self.unsanitized_peek_char()?; + self.sanitize(chr) + } + + fn unsanitized_peek_char(&mut self) -> Result { Ok(if let Some(chr) = self.peeked_char { chr } else { @@ -229,20 +241,35 @@ impl ListTokenizer { }) } + fn sanitize(&mut self, chr: char) -> Result { + if !chr.is_ascii_alphanumeric() + && !self.is_recognized_whitespace(chr) + && !(chr == '(' || chr == ')') + { + return Err(self.add_context(ParseError::ForbiddenChar(chr))); + } + + Ok(chr) + } + fn skip_whitespace(&mut self) -> Result<(), ParseErrorContext> { loop { - let chr = self.peek_char()?; - if chr == ' ' || chr == '\r' || chr == '\n' { - self.next_char().unwrap(); + let chr = self.unsanitized_peek_char()?; + if self.is_recognized_whitespace(chr) { + self.unsanitized_next_char().unwrap(); } else { return Ok(()); } } } + fn is_recognized_whitespace(&mut self, chr: char) -> bool { + chr == ' ' || chr == '\r' || chr == '\n' + } + fn read_string(&mut self) -> Result { if let Some(chr) = self.quote_char { - if chr == self.peek_char()? { + if chr == self.unsanitized_peek_char()? { return self.read_quoted(); } } @@ -253,11 +280,11 @@ impl ListTokenizer { let mut string = String::new(); loop { - let chr = self.peek_char()?; - if chr == ' ' || chr == '(' || chr == ')' || chr == '\r' || chr == '\n' { + let chr = self.unsanitized_peek_char()?; + if self.is_recognized_whitespace(chr) || chr == '(' || chr == ')' { break; } - string.push(self.next_char().unwrap()); + string.push(self.unsanitized_next_char().unwrap()); } if string.is_empty() { @@ -270,20 +297,20 @@ impl ListTokenizer { fn read_quoted(&mut self) -> Result { let mut string = String::new(); - if self.next_char().unwrap() != self.quote_char.unwrap() { + if self.unsanitized_next_char().unwrap() != self.quote_char.unwrap() { panic!(); } loop { - let chr = self.peek_char()?; + let chr = self.unsanitized_peek_char()?; if !self.space_in_quoted && chr == ' ' { panic!("found a space inside a quoted string, but file didn't declare this possibility"); } if chr == self.quote_char.unwrap() { - self.next_char().unwrap(); + self.unsanitized_next_char().unwrap(); break; } - string.push(self.next_char().unwrap()); + string.push(self.unsanitized_next_char().unwrap()); } Ok(string) @@ -312,7 +339,7 @@ impl ListTokenizer { self.skip_whitespace()?; let context = self.context(); - let chr = self.peek_char()?; + let chr = self.unsanitized_peek_char()?; Ok(InputToken::new( if chr == '(' { self.next_char().unwrap();