diff --git a/src/compare/compare_field.rs b/src/compare/compare_field.rs index b5aa2417..1632da5b 100644 --- a/src/compare/compare_field.rs +++ b/src/compare/compare_field.rs @@ -1,3 +1,5 @@ +use std::borrow::Borrow; +use std::borrow::Cow; use std::collections::BTreeSet; use std::fmt::Debug; use std::str::FromStr; @@ -262,11 +264,11 @@ pub(crate) fn compare_property_set_of_quoted_string< .iter() .map(|e| e.as_atom()) .collect::, _>>()?; - let value: Vec = value + let value: Vec> = value .into_iter() .map(unquote) .collect::, _>>()?; - let value: BTreeSet<&str> = value.iter().map(|e| e.as_str()).collect(); + let value: BTreeSet<&str> = value.iter().map(|e| e.borrow()).collect(); let mismatched: Vec<_> = value.symmetric_difference(&rust_value).copied().collect(); if !mismatched.is_empty() { let this_status = DiffStatus::Bad; diff --git a/src/compare/sexp.rs b/src/compare/sexp.rs index 85cab3f1..a45816a4 100644 --- a/src/compare/sexp.rs +++ b/src/compare/sexp.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::collections::HashMap; use nom::branch::alt; @@ -36,12 +37,6 @@ pub struct TextWithProperties<'s> { pub(crate) properties: Vec>, } -enum ParseState { - Normal, - Escape, - Octal(Vec), -} - impl<'s> Token<'s> { pub(crate) fn as_vector<'p>( &'p self, @@ -117,8 +112,27 @@ fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { &input[..offset] } -pub(crate) fn unquote(text: &str) -> Result> { - let mut out: Vec = Vec::with_capacity(text.len()); +#[derive(Debug)] +enum UnquoteState { + Normal, + Escape, + HasEscape { + out: Vec, + }, + HasEscapeEscape { + out: Vec, + }, + Octal { + octal_begin_offset: usize, + octal: Vec, + }, + HasEscapeOctal { + out: Vec, + octal: Vec, + }, +} + +pub(crate) fn unquote(text: &str) -> Result, Box> { if !text.starts_with('"') { return Err("Quoted text does not start with quote.".into()); } @@ -126,54 +140,143 @@ pub(crate) fn unquote(text: &str) -> Result> return Err("Quoted text does not end with quote.".into()); } let interior_text = &text[1..(text.len() - 1)]; - let mut state = ParseState::Normal; - for current_char in interior_text.bytes() { + let mut state = UnquoteState::Normal; + for (offset, current_char) in interior_text.bytes().enumerate() { // Check to see if octal finished state = match (state, current_char) { - (ParseState::Octal(octal), b'0'..=b'7') if octal.len() < MAX_OCTAL_LENGTH => { - ParseState::Octal(octal) + ( + UnquoteState::Octal { + octal_begin_offset, + octal, + }, + b'0'..=b'7', + ) if octal.len() < MAX_OCTAL_LENGTH => UnquoteState::Octal { + octal_begin_offset, + octal, + }, + ( + UnquoteState::Octal { + octal_begin_offset, + octal, + }, + _, + ) => { + let octal_number_string = String::from_utf8(octal)?; + let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?; + let mut out: Vec = Vec::with_capacity(interior_text.len()); + out.extend_from_slice(&interior_text.as_bytes()[..octal_begin_offset]); + out.push(decoded_byte); + UnquoteState::HasEscape { out } } - (ParseState::Octal(octal), _) => { + (UnquoteState::HasEscapeOctal { out, octal }, b'0'..=b'7') + if octal.len() < MAX_OCTAL_LENGTH => + { + UnquoteState::HasEscapeOctal { out, octal } + } + (UnquoteState::HasEscapeOctal { mut out, octal }, _) => { let octal_number_string = String::from_utf8(octal)?; let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?; out.push(decoded_byte); - ParseState::Normal + UnquoteState::HasEscape { out } } (state, _) => state, }; state = match (state, current_char) { - (ParseState::Normal, b'\\') => ParseState::Escape, - (ParseState::Normal, _) => { + (UnquoteState::Normal, b'\\') => UnquoteState::Escape, + (UnquoteState::Normal, _) => UnquoteState::Normal, + (UnquoteState::HasEscape { out }, b'\\') => UnquoteState::HasEscapeEscape { out }, + (UnquoteState::HasEscape { mut out }, _) => { out.push(current_char); - ParseState::Normal + UnquoteState::HasEscape { out } } - (ParseState::Escape, b'n') => { + (UnquoteState::Escape, b'n') => { + let mut out: Vec = Vec::with_capacity(interior_text.len()); + // Subtract 1 from offset to account for backslash. + out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]); out.push(b'\n'); - ParseState::Normal + UnquoteState::HasEscape { out } } - (ParseState::Escape, b'\\') => { + (UnquoteState::HasEscapeEscape { mut out }, b'n') => { + out.push(b'\n'); + UnquoteState::HasEscape { out } + } + (UnquoteState::Escape, b'\\') => { + let mut out: Vec = Vec::with_capacity(interior_text.len()); + // Subtract 1 from offset to account for backslash. + out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]); out.push(b'\\'); - ParseState::Normal + UnquoteState::HasEscape { out } } - (ParseState::Escape, b'"') => { + (UnquoteState::HasEscapeEscape { mut out }, b'\\') => { + out.push(b'\\'); + UnquoteState::HasEscape { out } + } + (UnquoteState::Escape, b'"') => { + let mut out: Vec = Vec::with_capacity(interior_text.len()); + // Subtract 1 from offset to account for backslash. + out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]); out.push(b'"'); - ParseState::Normal + UnquoteState::HasEscape { out } } - (ParseState::Escape, b'0'..=b'7') => { + (UnquoteState::HasEscapeEscape { mut out }, b'"') => { + out.push(b'"'); + UnquoteState::HasEscape { out } + } + (UnquoteState::Escape, b'0'..=b'7') => { let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH); octal.push(current_char); - ParseState::Octal(octal) + // Substract 1 from offset to account for backslash + UnquoteState::Octal { + octal_begin_offset: offset - 1, + octal, + } } - (ParseState::Octal(mut octal), b'0'..=b'7') => { + (UnquoteState::HasEscapeEscape { out }, b'0'..=b'7') => { + let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH); octal.push(current_char); - ParseState::Octal(octal) + // Substract 1 from offset to account for backslash + UnquoteState::HasEscapeOctal { out, octal } } - _ => panic!("Invalid state unquoting string."), + ( + UnquoteState::Octal { + octal_begin_offset, + mut octal, + }, + b'0'..=b'7', + ) => { + octal.push(current_char); + UnquoteState::Octal { + octal_begin_offset, + octal, + } + } + (UnquoteState::HasEscapeOctal { out, mut octal }, b'0'..=b'7') => { + octal.push(current_char); + UnquoteState::HasEscapeOctal { out, octal } + } + (state, _) => panic!( + "Invalid state unquoting string: {:?} | {} | {:?}", + state, offset, interior_text + ), }; } - Ok(String::from_utf8(out)?) + match state { + UnquoteState::Normal | UnquoteState::Escape | UnquoteState::Octal { .. } => { + Ok(Cow::Borrowed(interior_text)) + } + UnquoteState::HasEscape { out } => Ok(Cow::Owned(String::from_utf8(out)?)), + UnquoteState::HasEscapeEscape { mut out } => { + out.push(b'\\'); + Ok(Cow::Owned(String::from_utf8(out)?)) + } + UnquoteState::HasEscapeOctal { mut out, octal } => { + out.push(b'\\'); + out.extend(octal); + Ok(Cow::Owned(String::from_utf8(out)?)) + } + } } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] diff --git a/src/compare/util.rs b/src/compare/util.rs index fd85938c..b7c09fbe 100644 --- a/src/compare/util.rs +++ b/src/compare/util.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::str::FromStr; use super::compare_field::compare_property_list_of_quoted_string; @@ -206,10 +207,10 @@ pub(crate) fn get_property_unquoted_atom<'s>( /// Get a named property containing an quoted string from the emacs token. /// /// Returns None if key is not found. -pub(crate) fn get_property_quoted_string( - emacs: &Token<'_>, +pub(crate) fn get_property_quoted_string<'s>( + emacs: &Token<'s>, key: &str, -) -> Result, Box> { +) -> Result>, Box> { get_property(emacs, key)? .map(Token::as_atom) .map_or(Ok(None), |r| r.map(Some))?