Add support for parsing quoted strings containing escaped octals.
This commit is contained in:
parent
6c77586960
commit
896250836b
@ -1,9 +1,10 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::escaped;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::bytes::complete::take_till1;
|
||||
use nom::character::complete::anychar;
|
||||
use nom::character::complete::digit1;
|
||||
use nom::character::complete::multispace0;
|
||||
use nom::character::complete::multispace1;
|
||||
use nom::character::complete::one_of;
|
||||
@ -11,6 +12,7 @@ use nom::combinator::map;
|
||||
use nom::combinator::not;
|
||||
use nom::combinator::opt;
|
||||
use nom::combinator::peek;
|
||||
use nom::combinator::recognize;
|
||||
use nom::multi::separated_list1;
|
||||
use nom::sequence::delimited;
|
||||
use nom::sequence::preceded;
|
||||
@ -18,6 +20,8 @@ use nom::sequence::tuple;
|
||||
|
||||
use crate::error::Res;
|
||||
|
||||
const MAX_OCTAL_LENGTH: usize = 3;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Token<'s> {
|
||||
Atom(&'s str),
|
||||
@ -35,6 +39,7 @@ pub struct TextWithProperties<'s> {
|
||||
enum ParseState {
|
||||
Normal,
|
||||
Escape,
|
||||
Octal(Vec<u8>),
|
||||
}
|
||||
|
||||
impl<'s> Token<'s> {
|
||||
@ -116,7 +121,7 @@ fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {
|
||||
}
|
||||
|
||||
pub(crate) fn unquote(text: &str) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let mut out = String::with_capacity(text.len());
|
||||
let mut out: Vec<u8> = Vec::with_capacity(text.len());
|
||||
if !text.starts_with(r#"""#) {
|
||||
return Err("Quoted text does not start with quote.".into());
|
||||
}
|
||||
@ -125,30 +130,53 @@ pub(crate) fn unquote(text: &str) -> Result<String, Box<dyn std::error::Error>>
|
||||
}
|
||||
let interior_text = &text[1..(text.len() - 1)];
|
||||
let mut state = ParseState::Normal;
|
||||
for current_char in interior_text.chars().into_iter() {
|
||||
for current_char in interior_text.bytes().into_iter() {
|
||||
// Check to see if octal finished
|
||||
state = match (state, current_char) {
|
||||
(ParseState::Normal, '\\') => ParseState::Escape,
|
||||
(ParseState::Octal(octal), b'0'..=b'7') if octal.len() < MAX_OCTAL_LENGTH => {
|
||||
ParseState::Octal(octal)
|
||||
}
|
||||
(ParseState::Octal(octal), _) => {
|
||||
let octal_number_string = String::from_utf8(octal)?;
|
||||
let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?;
|
||||
out.push(decoded_byte);
|
||||
ParseState::Normal
|
||||
}
|
||||
(state, _) => state,
|
||||
};
|
||||
|
||||
state = match (state, current_char) {
|
||||
(ParseState::Normal, b'\\') => ParseState::Escape,
|
||||
(ParseState::Normal, _) => {
|
||||
out.push(current_char);
|
||||
ParseState::Normal
|
||||
}
|
||||
(ParseState::Escape, 'n') => {
|
||||
out.push('\n');
|
||||
(ParseState::Escape, b'n') => {
|
||||
out.push(b'\n');
|
||||
ParseState::Normal
|
||||
}
|
||||
(ParseState::Escape, '\\') => {
|
||||
out.push('\\');
|
||||
(ParseState::Escape, b'\\') => {
|
||||
out.push(b'\\');
|
||||
ParseState::Normal
|
||||
}
|
||||
(ParseState::Escape, '"') => {
|
||||
out.push('"');
|
||||
(ParseState::Escape, b'"') => {
|
||||
out.push(b'"');
|
||||
ParseState::Normal
|
||||
}
|
||||
_ => todo!(),
|
||||
(ParseState::Escape, b'0'..=b'7') => {
|
||||
let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH);
|
||||
octal.push(current_char);
|
||||
ParseState::Octal(octal)
|
||||
}
|
||||
(ParseState::Octal(mut octal), b'0'..=b'7') => {
|
||||
octal.push(current_char);
|
||||
ParseState::Octal(octal)
|
||||
}
|
||||
_ => panic!("Invalid state unquoting string."),
|
||||
};
|
||||
}
|
||||
|
||||
Ok(out)
|
||||
Ok(String::from_utf8(out)?)
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
@ -210,15 +238,30 @@ fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
|
||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||
fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||
let (remaining, _) = tag(r#"""#)(input)?;
|
||||
let (remaining, _) = escaped(
|
||||
take_till1(|c| match c {
|
||||
'\\' | '"' => true,
|
||||
_ => false,
|
||||
}),
|
||||
'\\',
|
||||
one_of(r#""n\\"#),
|
||||
)(remaining)?;
|
||||
let (mut remaining, _) = tag(r#"""#)(input)?;
|
||||
let mut in_escape = false;
|
||||
loop {
|
||||
if in_escape {
|
||||
let (remain, _) = alt((recognize(one_of(r#""n\\"#)), digit1))(remaining)?;
|
||||
remaining = remain;
|
||||
in_escape = false;
|
||||
} else {
|
||||
let end_quote = tag::<_, _, nom::error::Error<_>>(r#"""#)(remaining);
|
||||
if end_quote.is_ok() {
|
||||
break;
|
||||
}
|
||||
|
||||
let escape_backslash = tag::<_, _, nom::error::Error<_>>("\\")(remaining);
|
||||
if let Ok((remain, _)) = escape_backslash {
|
||||
remaining = remain;
|
||||
in_escape = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
let (remain, _) = anychar(remaining)?;
|
||||
remaining = remain;
|
||||
}
|
||||
}
|
||||
let (remaining, _) = tag(r#"""#)(remaining)?;
|
||||
let source = get_consumed(input, remaining);
|
||||
Ok((remaining, Token::Atom(source.into())))
|
||||
|
Loading…
Reference in New Issue
Block a user