Add support for parsing quoted strings containing escaped octals.
This commit is contained in:
parent
6c77586960
commit
896250836b
@ -1,9 +1,10 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use nom::branch::alt;
|
use nom::branch::alt;
|
||||||
use nom::bytes::complete::escaped;
|
|
||||||
use nom::bytes::complete::tag;
|
use nom::bytes::complete::tag;
|
||||||
use nom::bytes::complete::take_till1;
|
use nom::bytes::complete::take_till1;
|
||||||
|
use nom::character::complete::anychar;
|
||||||
|
use nom::character::complete::digit1;
|
||||||
use nom::character::complete::multispace0;
|
use nom::character::complete::multispace0;
|
||||||
use nom::character::complete::multispace1;
|
use nom::character::complete::multispace1;
|
||||||
use nom::character::complete::one_of;
|
use nom::character::complete::one_of;
|
||||||
@ -11,6 +12,7 @@ use nom::combinator::map;
|
|||||||
use nom::combinator::not;
|
use nom::combinator::not;
|
||||||
use nom::combinator::opt;
|
use nom::combinator::opt;
|
||||||
use nom::combinator::peek;
|
use nom::combinator::peek;
|
||||||
|
use nom::combinator::recognize;
|
||||||
use nom::multi::separated_list1;
|
use nom::multi::separated_list1;
|
||||||
use nom::sequence::delimited;
|
use nom::sequence::delimited;
|
||||||
use nom::sequence::preceded;
|
use nom::sequence::preceded;
|
||||||
@ -18,6 +20,8 @@ use nom::sequence::tuple;
|
|||||||
|
|
||||||
use crate::error::Res;
|
use crate::error::Res;
|
||||||
|
|
||||||
|
const MAX_OCTAL_LENGTH: usize = 3;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum Token<'s> {
|
pub enum Token<'s> {
|
||||||
Atom(&'s str),
|
Atom(&'s str),
|
||||||
@ -35,6 +39,7 @@ pub struct TextWithProperties<'s> {
|
|||||||
enum ParseState {
|
enum ParseState {
|
||||||
Normal,
|
Normal,
|
||||||
Escape,
|
Escape,
|
||||||
|
Octal(Vec<u8>),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Token<'s> {
|
impl<'s> Token<'s> {
|
||||||
@ -116,7 +121,7 @@ fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn unquote(text: &str) -> Result<String, Box<dyn std::error::Error>> {
|
pub(crate) fn unquote(text: &str) -> Result<String, Box<dyn std::error::Error>> {
|
||||||
let mut out = String::with_capacity(text.len());
|
let mut out: Vec<u8> = Vec::with_capacity(text.len());
|
||||||
if !text.starts_with(r#"""#) {
|
if !text.starts_with(r#"""#) {
|
||||||
return Err("Quoted text does not start with quote.".into());
|
return Err("Quoted text does not start with quote.".into());
|
||||||
}
|
}
|
||||||
@ -125,30 +130,53 @@ pub(crate) fn unquote(text: &str) -> Result<String, Box<dyn std::error::Error>>
|
|||||||
}
|
}
|
||||||
let interior_text = &text[1..(text.len() - 1)];
|
let interior_text = &text[1..(text.len() - 1)];
|
||||||
let mut state = ParseState::Normal;
|
let mut state = ParseState::Normal;
|
||||||
for current_char in interior_text.chars().into_iter() {
|
for current_char in interior_text.bytes().into_iter() {
|
||||||
|
// Check to see if octal finished
|
||||||
state = match (state, current_char) {
|
state = match (state, current_char) {
|
||||||
(ParseState::Normal, '\\') => ParseState::Escape,
|
(ParseState::Octal(octal), b'0'..=b'7') if octal.len() < MAX_OCTAL_LENGTH => {
|
||||||
|
ParseState::Octal(octal)
|
||||||
|
}
|
||||||
|
(ParseState::Octal(octal), _) => {
|
||||||
|
let octal_number_string = String::from_utf8(octal)?;
|
||||||
|
let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?;
|
||||||
|
out.push(decoded_byte);
|
||||||
|
ParseState::Normal
|
||||||
|
}
|
||||||
|
(state, _) => state,
|
||||||
|
};
|
||||||
|
|
||||||
|
state = match (state, current_char) {
|
||||||
|
(ParseState::Normal, b'\\') => ParseState::Escape,
|
||||||
(ParseState::Normal, _) => {
|
(ParseState::Normal, _) => {
|
||||||
out.push(current_char);
|
out.push(current_char);
|
||||||
ParseState::Normal
|
ParseState::Normal
|
||||||
}
|
}
|
||||||
(ParseState::Escape, 'n') => {
|
(ParseState::Escape, b'n') => {
|
||||||
out.push('\n');
|
out.push(b'\n');
|
||||||
ParseState::Normal
|
ParseState::Normal
|
||||||
}
|
}
|
||||||
(ParseState::Escape, '\\') => {
|
(ParseState::Escape, b'\\') => {
|
||||||
out.push('\\');
|
out.push(b'\\');
|
||||||
ParseState::Normal
|
ParseState::Normal
|
||||||
}
|
}
|
||||||
(ParseState::Escape, '"') => {
|
(ParseState::Escape, b'"') => {
|
||||||
out.push('"');
|
out.push(b'"');
|
||||||
ParseState::Normal
|
ParseState::Normal
|
||||||
}
|
}
|
||||||
_ => todo!(),
|
(ParseState::Escape, b'0'..=b'7') => {
|
||||||
|
let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH);
|
||||||
|
octal.push(current_char);
|
||||||
|
ParseState::Octal(octal)
|
||||||
|
}
|
||||||
|
(ParseState::Octal(mut octal), b'0'..=b'7') => {
|
||||||
|
octal.push(current_char);
|
||||||
|
ParseState::Octal(octal)
|
||||||
|
}
|
||||||
|
_ => panic!("Invalid state unquoting string."),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(out)
|
Ok(String::from_utf8(out)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||||
@ -210,15 +238,30 @@ fn unquoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
|||||||
|
|
||||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||||
fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
|
||||||
let (remaining, _) = tag(r#"""#)(input)?;
|
let (mut remaining, _) = tag(r#"""#)(input)?;
|
||||||
let (remaining, _) = escaped(
|
let mut in_escape = false;
|
||||||
take_till1(|c| match c {
|
loop {
|
||||||
'\\' | '"' => true,
|
if in_escape {
|
||||||
_ => false,
|
let (remain, _) = alt((recognize(one_of(r#""n\\"#)), digit1))(remaining)?;
|
||||||
}),
|
remaining = remain;
|
||||||
'\\',
|
in_escape = false;
|
||||||
one_of(r#""n\\"#),
|
} else {
|
||||||
)(remaining)?;
|
let end_quote = tag::<_, _, nom::error::Error<_>>(r#"""#)(remaining);
|
||||||
|
if end_quote.is_ok() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let escape_backslash = tag::<_, _, nom::error::Error<_>>("\\")(remaining);
|
||||||
|
if let Ok((remain, _)) = escape_backslash {
|
||||||
|
remaining = remain;
|
||||||
|
in_escape = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let (remain, _) = anychar(remaining)?;
|
||||||
|
remaining = remain;
|
||||||
|
}
|
||||||
|
}
|
||||||
let (remaining, _) = tag(r#"""#)(remaining)?;
|
let (remaining, _) = tag(r#"""#)(remaining)?;
|
||||||
let source = get_consumed(input, remaining);
|
let source = get_consumed(input, remaining);
|
||||||
Ok((remaining, Token::Atom(source.into())))
|
Ok((remaining, Token::Atom(source.into())))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user