mirror of https://codeberg.org/topola/topola.git
feat(specctra): sanitize DSN input by safelisting chars where possible
I have implemented sanitization for every `.next_char()` and `.peek_char()` that I could without causing an error. It turned out that there are only two minor places where I can do that for `.next_char()`, and none for `.peek_char()`. This is for demonstrative purposes. I don't expect to merge this.
This commit is contained in:
parent
9dad83d221
commit
31e7e9d285
|
|
@ -7,6 +7,8 @@ use utf8_chars::BufReadCharsExt;
|
||||||
pub enum ParseError {
|
pub enum ParseError {
|
||||||
#[error("unexpected end of file")]
|
#[error("unexpected end of file")]
|
||||||
Eof,
|
Eof,
|
||||||
|
#[error("forbidden character")]
|
||||||
|
ForbiddenChar(char),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Io(#[from] std::io::Error),
|
Io(#[from] std::io::Error),
|
||||||
#[error("expected {0}")]
|
#[error("expected {0}")]
|
||||||
|
|
@ -201,7 +203,12 @@ impl<R: std::io::BufRead> ListTokenizer<R> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn next_char(&mut self) -> Result<char, ParseErrorContext> {
|
fn next_char(&mut self) -> Result<char, ParseErrorContext> {
|
||||||
let return_chr = self.peek_char()?;
|
let chr = self.unsanitized_next_char()?;
|
||||||
|
self.sanitize(chr)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unsanitized_next_char(&mut self) -> Result<char, ParseErrorContext> {
|
||||||
|
let return_chr = self.unsanitized_peek_char()?;
|
||||||
self.peeked_char = None;
|
self.peeked_char = None;
|
||||||
|
|
||||||
if return_chr == '\n' {
|
if return_chr == '\n' {
|
||||||
|
|
@ -215,6 +222,11 @@ impl<R: std::io::BufRead> ListTokenizer<R> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn peek_char(&mut self) -> Result<char, ParseErrorContext> {
|
fn peek_char(&mut self) -> Result<char, ParseErrorContext> {
|
||||||
|
let chr = self.unsanitized_peek_char()?;
|
||||||
|
self.sanitize(chr)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unsanitized_peek_char(&mut self) -> Result<char, ParseErrorContext> {
|
||||||
Ok(if let Some(chr) = self.peeked_char {
|
Ok(if let Some(chr) = self.peeked_char {
|
||||||
chr
|
chr
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -229,20 +241,35 @@ impl<R: std::io::BufRead> ListTokenizer<R> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn sanitize(&mut self, chr: char) -> Result<char, ParseErrorContext> {
|
||||||
|
if !chr.is_ascii_alphanumeric()
|
||||||
|
&& !self.is_recognized_whitespace(chr)
|
||||||
|
&& !(chr == '(' || chr == ')')
|
||||||
|
{
|
||||||
|
return Err(self.add_context(ParseError::ForbiddenChar(chr)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(chr)
|
||||||
|
}
|
||||||
|
|
||||||
fn skip_whitespace(&mut self) -> Result<(), ParseErrorContext> {
|
fn skip_whitespace(&mut self) -> Result<(), ParseErrorContext> {
|
||||||
loop {
|
loop {
|
||||||
let chr = self.peek_char()?;
|
let chr = self.unsanitized_peek_char()?;
|
||||||
if chr == ' ' || chr == '\r' || chr == '\n' {
|
if self.is_recognized_whitespace(chr) {
|
||||||
self.next_char().unwrap();
|
self.unsanitized_next_char().unwrap();
|
||||||
} else {
|
} else {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_recognized_whitespace(&mut self, chr: char) -> bool {
|
||||||
|
chr == ' ' || chr == '\r' || chr == '\n'
|
||||||
|
}
|
||||||
|
|
||||||
fn read_string(&mut self) -> Result<String, ParseErrorContext> {
|
fn read_string(&mut self) -> Result<String, ParseErrorContext> {
|
||||||
if let Some(chr) = self.quote_char {
|
if let Some(chr) = self.quote_char {
|
||||||
if chr == self.peek_char()? {
|
if chr == self.unsanitized_peek_char()? {
|
||||||
return self.read_quoted();
|
return self.read_quoted();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -253,11 +280,11 @@ impl<R: std::io::BufRead> ListTokenizer<R> {
|
||||||
let mut string = String::new();
|
let mut string = String::new();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let chr = self.peek_char()?;
|
let chr = self.unsanitized_peek_char()?;
|
||||||
if chr == ' ' || chr == '(' || chr == ')' || chr == '\r' || chr == '\n' {
|
if self.is_recognized_whitespace(chr) || chr == '(' || chr == ')' {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
string.push(self.next_char().unwrap());
|
string.push(self.unsanitized_next_char().unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
if string.is_empty() {
|
if string.is_empty() {
|
||||||
|
|
@ -270,20 +297,20 @@ impl<R: std::io::BufRead> ListTokenizer<R> {
|
||||||
fn read_quoted(&mut self) -> Result<String, ParseErrorContext> {
|
fn read_quoted(&mut self) -> Result<String, ParseErrorContext> {
|
||||||
let mut string = String::new();
|
let mut string = String::new();
|
||||||
|
|
||||||
if self.next_char().unwrap() != self.quote_char.unwrap() {
|
if self.unsanitized_next_char().unwrap() != self.quote_char.unwrap() {
|
||||||
panic!();
|
panic!();
|
||||||
}
|
}
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let chr = self.peek_char()?;
|
let chr = self.unsanitized_peek_char()?;
|
||||||
if !self.space_in_quoted && chr == ' ' {
|
if !self.space_in_quoted && chr == ' ' {
|
||||||
panic!("found a space inside a quoted string, but file didn't declare this possibility");
|
panic!("found a space inside a quoted string, but file didn't declare this possibility");
|
||||||
}
|
}
|
||||||
if chr == self.quote_char.unwrap() {
|
if chr == self.quote_char.unwrap() {
|
||||||
self.next_char().unwrap();
|
self.unsanitized_next_char().unwrap();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
string.push(self.next_char().unwrap());
|
string.push(self.unsanitized_next_char().unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(string)
|
Ok(string)
|
||||||
|
|
@ -312,7 +339,7 @@ impl<R: std::io::BufRead> ListTokenizer<R> {
|
||||||
self.skip_whitespace()?;
|
self.skip_whitespace()?;
|
||||||
let context = self.context();
|
let context = self.context();
|
||||||
|
|
||||||
let chr = self.peek_char()?;
|
let chr = self.unsanitized_peek_char()?;
|
||||||
Ok(InputToken::new(
|
Ok(InputToken::new(
|
||||||
if chr == '(' {
|
if chr == '(' {
|
||||||
self.next_char().unwrap();
|
self.next_char().unwrap();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue