feat(specctra): sanitize DSN input by safelisting chars where possible

I have implemented sanitization for every `.next_char()` and
`.peek_char()` that I could without causing an error. It turned out that
there are only two minor places where I can do that for `.next_char()`,
and none for `.peek_char()`.

This is for demonstrative purposes. I don't expect to merge this.
This commit is contained in:
Mikolaj Wielgus 2024-11-21 22:05:18 +01:00
parent 9dad83d221
commit 31e7e9d285
1 changed files with 40 additions and 13 deletions

View File

@ -7,6 +7,8 @@ use utf8_chars::BufReadCharsExt;
pub enum ParseError { pub enum ParseError {
#[error("unexpected end of file")] #[error("unexpected end of file")]
Eof, Eof,
#[error("forbidden character")]
ForbiddenChar(char),
#[error(transparent)] #[error(transparent)]
Io(#[from] std::io::Error), Io(#[from] std::io::Error),
#[error("expected {0}")] #[error("expected {0}")]
@ -201,7 +203,12 @@ impl<R: std::io::BufRead> ListTokenizer<R> {
} }
fn next_char(&mut self) -> Result<char, ParseErrorContext> { fn next_char(&mut self) -> Result<char, ParseErrorContext> {
let return_chr = self.peek_char()?; let chr = self.unsanitized_next_char()?;
self.sanitize(chr)
}
fn unsanitized_next_char(&mut self) -> Result<char, ParseErrorContext> {
let return_chr = self.unsanitized_peek_char()?;
self.peeked_char = None; self.peeked_char = None;
if return_chr == '\n' { if return_chr == '\n' {
@ -215,6 +222,11 @@ impl<R: std::io::BufRead> ListTokenizer<R> {
} }
fn peek_char(&mut self) -> Result<char, ParseErrorContext> { fn peek_char(&mut self) -> Result<char, ParseErrorContext> {
let chr = self.unsanitized_peek_char()?;
self.sanitize(chr)
}
fn unsanitized_peek_char(&mut self) -> Result<char, ParseErrorContext> {
Ok(if let Some(chr) = self.peeked_char { Ok(if let Some(chr) = self.peeked_char {
chr chr
} else { } else {
@ -229,20 +241,35 @@ impl<R: std::io::BufRead> ListTokenizer<R> {
}) })
} }
fn sanitize(&mut self, chr: char) -> Result<char, ParseErrorContext> {
if !chr.is_ascii_alphanumeric()
&& !self.is_recognized_whitespace(chr)
&& !(chr == '(' || chr == ')')
{
return Err(self.add_context(ParseError::ForbiddenChar(chr)));
}
Ok(chr)
}
fn skip_whitespace(&mut self) -> Result<(), ParseErrorContext> { fn skip_whitespace(&mut self) -> Result<(), ParseErrorContext> {
loop { loop {
let chr = self.peek_char()?; let chr = self.unsanitized_peek_char()?;
if chr == ' ' || chr == '\r' || chr == '\n' { if self.is_recognized_whitespace(chr) {
self.next_char().unwrap(); self.unsanitized_next_char().unwrap();
} else { } else {
return Ok(()); return Ok(());
} }
} }
} }
fn is_recognized_whitespace(&mut self, chr: char) -> bool {
chr == ' ' || chr == '\r' || chr == '\n'
}
fn read_string(&mut self) -> Result<String, ParseErrorContext> { fn read_string(&mut self) -> Result<String, ParseErrorContext> {
if let Some(chr) = self.quote_char { if let Some(chr) = self.quote_char {
if chr == self.peek_char()? { if chr == self.unsanitized_peek_char()? {
return self.read_quoted(); return self.read_quoted();
} }
} }
@ -253,11 +280,11 @@ impl<R: std::io::BufRead> ListTokenizer<R> {
let mut string = String::new(); let mut string = String::new();
loop { loop {
let chr = self.peek_char()?; let chr = self.unsanitized_peek_char()?;
if chr == ' ' || chr == '(' || chr == ')' || chr == '\r' || chr == '\n' { if self.is_recognized_whitespace(chr) || chr == '(' || chr == ')' {
break; break;
} }
string.push(self.next_char().unwrap()); string.push(self.unsanitized_next_char().unwrap());
} }
if string.is_empty() { if string.is_empty() {
@ -270,20 +297,20 @@ impl<R: std::io::BufRead> ListTokenizer<R> {
fn read_quoted(&mut self) -> Result<String, ParseErrorContext> { fn read_quoted(&mut self) -> Result<String, ParseErrorContext> {
let mut string = String::new(); let mut string = String::new();
if self.next_char().unwrap() != self.quote_char.unwrap() { if self.unsanitized_next_char().unwrap() != self.quote_char.unwrap() {
panic!(); panic!();
} }
loop { loop {
let chr = self.peek_char()?; let chr = self.unsanitized_peek_char()?;
if !self.space_in_quoted && chr == ' ' { if !self.space_in_quoted && chr == ' ' {
panic!("found a space inside a quoted string, but file didn't declare this possibility"); panic!("found a space inside a quoted string, but file didn't declare this possibility");
} }
if chr == self.quote_char.unwrap() { if chr == self.quote_char.unwrap() {
self.next_char().unwrap(); self.unsanitized_next_char().unwrap();
break; break;
} }
string.push(self.next_char().unwrap()); string.push(self.unsanitized_next_char().unwrap());
} }
Ok(string) Ok(string)
@ -312,7 +339,7 @@ impl<R: std::io::BufRead> ListTokenizer<R> {
self.skip_whitespace()?; self.skip_whitespace()?;
let context = self.context(); let context = self.context();
let chr = self.peek_char()?; let chr = self.unsanitized_peek_char()?;
Ok(InputToken::new( Ok(InputToken::new(
if chr == '(' { if chr == '(' {
self.next_char().unwrap(); self.next_char().unwrap();