Only allocate memory when unquoting sexp string that contains escapes.
All checks were successful
rust-build Build rust-build has succeeded
rust-foreign-document-test Build rust-foreign-document-test has succeeded
rust-test Build rust-test has succeeded
clippy Build clippy has succeeded
rustfmt Build rustfmt has succeeded

If the quoted string contains no escape sequences, then unquoting the string can be done by simply shaving off the leading and trailing quotation marks which can be a slice operation. By returning Cow, we can return either a borrowed slice or an owned String.
This commit is contained in:
Tom Alexander 2023-10-20 12:44:24 -04:00
parent 503db94b2c
commit acfc5e5e68
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
3 changed files with 140 additions and 34 deletions

View File

@ -1,3 +1,5 @@
use std::borrow::Borrow;
use std::borrow::Cow;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::fmt::Debug; use std::fmt::Debug;
use std::str::FromStr; use std::str::FromStr;
@ -262,11 +264,11 @@ pub(crate) fn compare_property_set_of_quoted_string<
.iter() .iter()
.map(|e| e.as_atom()) .map(|e| e.as_atom())
.collect::<Result<Vec<_>, _>>()?; .collect::<Result<Vec<_>, _>>()?;
let value: Vec<String> = value let value: Vec<Cow<'_, str>> = value
.into_iter() .into_iter()
.map(unquote) .map(unquote)
.collect::<Result<Vec<_>, _>>()?; .collect::<Result<Vec<_>, _>>()?;
let value: BTreeSet<&str> = value.iter().map(|e| e.as_str()).collect(); let value: BTreeSet<&str> = value.iter().map(|e| e.borrow()).collect();
let mismatched: Vec<_> = value.symmetric_difference(&rust_value).copied().collect(); let mismatched: Vec<_> = value.symmetric_difference(&rust_value).copied().collect();
if !mismatched.is_empty() { if !mismatched.is_empty() {
let this_status = DiffStatus::Bad; let this_status = DiffStatus::Bad;

View File

@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use nom::branch::alt; use nom::branch::alt;
@ -36,12 +37,6 @@ pub struct TextWithProperties<'s> {
pub(crate) properties: Vec<Token<'s>>, pub(crate) properties: Vec<Token<'s>>,
} }
enum ParseState {
Normal,
Escape,
Octal(Vec<u8>),
}
impl<'s> Token<'s> { impl<'s> Token<'s> {
pub(crate) fn as_vector<'p>( pub(crate) fn as_vector<'p>(
&'p self, &'p self,
@ -117,8 +112,27 @@ fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {
&input[..offset] &input[..offset]
} }
pub(crate) fn unquote(text: &str) -> Result<String, Box<dyn std::error::Error>> { #[derive(Debug)]
let mut out: Vec<u8> = Vec::with_capacity(text.len()); enum UnquoteState {
Normal,
Escape,
HasEscape {
out: Vec<u8>,
},
HasEscapeEscape {
out: Vec<u8>,
},
Octal {
octal_begin_offset: usize,
octal: Vec<u8>,
},
HasEscapeOctal {
out: Vec<u8>,
octal: Vec<u8>,
},
}
pub(crate) fn unquote(text: &str) -> Result<Cow<'_, str>, Box<dyn std::error::Error>> {
if !text.starts_with('"') { if !text.starts_with('"') {
return Err("Quoted text does not start with quote.".into()); return Err("Quoted text does not start with quote.".into());
} }
@ -126,54 +140,143 @@ pub(crate) fn unquote(text: &str) -> Result<String, Box<dyn std::error::Error>>
return Err("Quoted text does not end with quote.".into()); return Err("Quoted text does not end with quote.".into());
} }
let interior_text = &text[1..(text.len() - 1)]; let interior_text = &text[1..(text.len() - 1)];
let mut state = ParseState::Normal; let mut state = UnquoteState::Normal;
for current_char in interior_text.bytes() { for (offset, current_char) in interior_text.bytes().enumerate() {
// Check to see if octal finished // Check to see if octal finished
state = match (state, current_char) { state = match (state, current_char) {
(ParseState::Octal(octal), b'0'..=b'7') if octal.len() < MAX_OCTAL_LENGTH => { (
ParseState::Octal(octal) UnquoteState::Octal {
octal_begin_offset,
octal,
},
b'0'..=b'7',
) if octal.len() < MAX_OCTAL_LENGTH => UnquoteState::Octal {
octal_begin_offset,
octal,
},
(
UnquoteState::Octal {
octal_begin_offset,
octal,
},
_,
) => {
let octal_number_string = String::from_utf8(octal)?;
let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?;
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
out.extend_from_slice(&interior_text.as_bytes()[..octal_begin_offset]);
out.push(decoded_byte);
UnquoteState::HasEscape { out }
} }
(ParseState::Octal(octal), _) => { (UnquoteState::HasEscapeOctal { out, octal }, b'0'..=b'7')
if octal.len() < MAX_OCTAL_LENGTH =>
{
UnquoteState::HasEscapeOctal { out, octal }
}
(UnquoteState::HasEscapeOctal { mut out, octal }, _) => {
let octal_number_string = String::from_utf8(octal)?; let octal_number_string = String::from_utf8(octal)?;
let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?; let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?;
out.push(decoded_byte); out.push(decoded_byte);
ParseState::Normal UnquoteState::HasEscape { out }
} }
(state, _) => state, (state, _) => state,
}; };
state = match (state, current_char) { state = match (state, current_char) {
(ParseState::Normal, b'\\') => ParseState::Escape, (UnquoteState::Normal, b'\\') => UnquoteState::Escape,
(ParseState::Normal, _) => { (UnquoteState::Normal, _) => UnquoteState::Normal,
(UnquoteState::HasEscape { out }, b'\\') => UnquoteState::HasEscapeEscape { out },
(UnquoteState::HasEscape { mut out }, _) => {
out.push(current_char); out.push(current_char);
ParseState::Normal UnquoteState::HasEscape { out }
} }
(ParseState::Escape, b'n') => { (UnquoteState::Escape, b'n') => {
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
// Subtract 1 from offset to account for backslash.
out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]);
out.push(b'\n'); out.push(b'\n');
ParseState::Normal UnquoteState::HasEscape { out }
} }
(ParseState::Escape, b'\\') => { (UnquoteState::HasEscapeEscape { mut out }, b'n') => {
out.push(b'\n');
UnquoteState::HasEscape { out }
}
(UnquoteState::Escape, b'\\') => {
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
// Subtract 1 from offset to account for backslash.
out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]);
out.push(b'\\'); out.push(b'\\');
ParseState::Normal UnquoteState::HasEscape { out }
} }
(ParseState::Escape, b'"') => { (UnquoteState::HasEscapeEscape { mut out }, b'\\') => {
out.push(b'\\');
UnquoteState::HasEscape { out }
}
(UnquoteState::Escape, b'"') => {
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
// Subtract 1 from offset to account for backslash.
out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]);
out.push(b'"'); out.push(b'"');
ParseState::Normal UnquoteState::HasEscape { out }
} }
(ParseState::Escape, b'0'..=b'7') => { (UnquoteState::HasEscapeEscape { mut out }, b'"') => {
out.push(b'"');
UnquoteState::HasEscape { out }
}
(UnquoteState::Escape, b'0'..=b'7') => {
let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH); let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH);
octal.push(current_char); octal.push(current_char);
ParseState::Octal(octal) // Substract 1 from offset to account for backslash
UnquoteState::Octal {
octal_begin_offset: offset - 1,
octal,
}
} }
(ParseState::Octal(mut octal), b'0'..=b'7') => { (UnquoteState::HasEscapeEscape { out }, b'0'..=b'7') => {
let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH);
octal.push(current_char); octal.push(current_char);
ParseState::Octal(octal) // Substract 1 from offset to account for backslash
UnquoteState::HasEscapeOctal { out, octal }
} }
_ => panic!("Invalid state unquoting string."), (
UnquoteState::Octal {
octal_begin_offset,
mut octal,
},
b'0'..=b'7',
) => {
octal.push(current_char);
UnquoteState::Octal {
octal_begin_offset,
octal,
}
}
(UnquoteState::HasEscapeOctal { out, mut octal }, b'0'..=b'7') => {
octal.push(current_char);
UnquoteState::HasEscapeOctal { out, octal }
}
(state, _) => panic!(
"Invalid state unquoting string: {:?} | {} | {:?}",
state, offset, interior_text
),
}; };
} }
Ok(String::from_utf8(out)?) match state {
UnquoteState::Normal | UnquoteState::Escape | UnquoteState::Octal { .. } => {
Ok(Cow::Borrowed(interior_text))
}
UnquoteState::HasEscape { out } => Ok(Cow::Owned(String::from_utf8(out)?)),
UnquoteState::HasEscapeEscape { mut out } => {
out.push(b'\\');
Ok(Cow::Owned(String::from_utf8(out)?))
}
UnquoteState::HasEscapeOctal { mut out, octal } => {
out.push(b'\\');
out.extend(octal);
Ok(Cow::Owned(String::from_utf8(out)?))
}
}
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]

View File

@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::str::FromStr; use std::str::FromStr;
use super::compare_field::compare_property_list_of_quoted_string; use super::compare_field::compare_property_list_of_quoted_string;
@ -206,10 +207,10 @@ pub(crate) fn get_property_unquoted_atom<'s>(
/// Get a named property containing an quoted string from the emacs token. /// Get a named property containing an quoted string from the emacs token.
/// ///
/// Returns None if key is not found. /// Returns None if key is not found.
pub(crate) fn get_property_quoted_string( pub(crate) fn get_property_quoted_string<'s>(
emacs: &Token<'_>, emacs: &Token<'s>,
key: &str, key: &str,
) -> Result<Option<String>, Box<dyn std::error::Error>> { ) -> Result<Option<Cow<'s, str>>, Box<dyn std::error::Error>> {
get_property(emacs, key)? get_property(emacs, key)?
.map(Token::as_atom) .map(Token::as_atom)
.map_or(Ok(None), |r| r.map(Some))? .map_or(Ok(None), |r| r.map(Some))?