Only allocate memory when unquoting sexp string that contains escapes.
clippy Build clippy has succeeded Details
rustfmt Build rustfmt has succeeded Details
rust-build Build rust-build has succeeded Details
rust-foreign-document-test Build rust-foreign-document-test has succeeded Details
rust-test Build rust-test has succeeded Details

If the quoted string contains no escape sequences, then unquoting the string can be done by simply shaving off the leading and trailing quotation marks which can be a slice operation. By returning Cow, we can return either a borrowed slice or an owned String.
This commit is contained in:
Tom Alexander 2023-10-20 12:44:24 -04:00
parent 503db94b2c
commit acfc5e5e68
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
3 changed files with 140 additions and 34 deletions

View File

@ -1,3 +1,5 @@
use std::borrow::Borrow;
use std::borrow::Cow;
use std::collections::BTreeSet;
use std::fmt::Debug;
use std::str::FromStr;
@ -262,11 +264,11 @@ pub(crate) fn compare_property_set_of_quoted_string<
.iter()
.map(|e| e.as_atom())
.collect::<Result<Vec<_>, _>>()?;
let value: Vec<String> = value
let value: Vec<Cow<'_, str>> = value
.into_iter()
.map(unquote)
.collect::<Result<Vec<_>, _>>()?;
let value: BTreeSet<&str> = value.iter().map(|e| e.as_str()).collect();
let value: BTreeSet<&str> = value.iter().map(|e| e.borrow()).collect();
let mismatched: Vec<_> = value.symmetric_difference(&rust_value).copied().collect();
if !mismatched.is_empty() {
let this_status = DiffStatus::Bad;

View File

@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::collections::HashMap;
use nom::branch::alt;
@ -36,12 +37,6 @@ pub struct TextWithProperties<'s> {
pub(crate) properties: Vec<Token<'s>>,
}
enum ParseState {
Normal,
Escape,
Octal(Vec<u8>),
}
impl<'s> Token<'s> {
pub(crate) fn as_vector<'p>(
&'p self,
@ -117,8 +112,27 @@ fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {
&input[..offset]
}
pub(crate) fn unquote(text: &str) -> Result<String, Box<dyn std::error::Error>> {
let mut out: Vec<u8> = Vec::with_capacity(text.len());
#[derive(Debug)]
enum UnquoteState {
Normal,
Escape,
HasEscape {
out: Vec<u8>,
},
HasEscapeEscape {
out: Vec<u8>,
},
Octal {
octal_begin_offset: usize,
octal: Vec<u8>,
},
HasEscapeOctal {
out: Vec<u8>,
octal: Vec<u8>,
},
}
pub(crate) fn unquote(text: &str) -> Result<Cow<'_, str>, Box<dyn std::error::Error>> {
if !text.starts_with('"') {
return Err("Quoted text does not start with quote.".into());
}
@ -126,54 +140,143 @@ pub(crate) fn unquote(text: &str) -> Result<String, Box<dyn std::error::Error>>
return Err("Quoted text does not end with quote.".into());
}
let interior_text = &text[1..(text.len() - 1)];
let mut state = ParseState::Normal;
for current_char in interior_text.bytes() {
let mut state = UnquoteState::Normal;
for (offset, current_char) in interior_text.bytes().enumerate() {
// Check to see if octal finished
state = match (state, current_char) {
(ParseState::Octal(octal), b'0'..=b'7') if octal.len() < MAX_OCTAL_LENGTH => {
ParseState::Octal(octal)
(
UnquoteState::Octal {
octal_begin_offset,
octal,
},
b'0'..=b'7',
) if octal.len() < MAX_OCTAL_LENGTH => UnquoteState::Octal {
octal_begin_offset,
octal,
},
(
UnquoteState::Octal {
octal_begin_offset,
octal,
},
_,
) => {
let octal_number_string = String::from_utf8(octal)?;
let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?;
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
out.extend_from_slice(&interior_text.as_bytes()[..octal_begin_offset]);
out.push(decoded_byte);
UnquoteState::HasEscape { out }
}
(ParseState::Octal(octal), _) => {
(UnquoteState::HasEscapeOctal { out, octal }, b'0'..=b'7')
if octal.len() < MAX_OCTAL_LENGTH =>
{
UnquoteState::HasEscapeOctal { out, octal }
}
(UnquoteState::HasEscapeOctal { mut out, octal }, _) => {
let octal_number_string = String::from_utf8(octal)?;
let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?;
out.push(decoded_byte);
ParseState::Normal
UnquoteState::HasEscape { out }
}
(state, _) => state,
};
state = match (state, current_char) {
(ParseState::Normal, b'\\') => ParseState::Escape,
(ParseState::Normal, _) => {
(UnquoteState::Normal, b'\\') => UnquoteState::Escape,
(UnquoteState::Normal, _) => UnquoteState::Normal,
(UnquoteState::HasEscape { out }, b'\\') => UnquoteState::HasEscapeEscape { out },
(UnquoteState::HasEscape { mut out }, _) => {
out.push(current_char);
ParseState::Normal
UnquoteState::HasEscape { out }
}
(ParseState::Escape, b'n') => {
(UnquoteState::Escape, b'n') => {
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
// Subtract 1 from offset to account for backslash.
out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]);
out.push(b'\n');
ParseState::Normal
UnquoteState::HasEscape { out }
}
(ParseState::Escape, b'\\') => {
(UnquoteState::HasEscapeEscape { mut out }, b'n') => {
out.push(b'\n');
UnquoteState::HasEscape { out }
}
(UnquoteState::Escape, b'\\') => {
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
// Subtract 1 from offset to account for backslash.
out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]);
out.push(b'\\');
ParseState::Normal
UnquoteState::HasEscape { out }
}
(ParseState::Escape, b'"') => {
(UnquoteState::HasEscapeEscape { mut out }, b'\\') => {
out.push(b'\\');
UnquoteState::HasEscape { out }
}
(UnquoteState::Escape, b'"') => {
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
// Subtract 1 from offset to account for backslash.
out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]);
out.push(b'"');
ParseState::Normal
UnquoteState::HasEscape { out }
}
(ParseState::Escape, b'0'..=b'7') => {
(UnquoteState::HasEscapeEscape { mut out }, b'"') => {
out.push(b'"');
UnquoteState::HasEscape { out }
}
(UnquoteState::Escape, b'0'..=b'7') => {
let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH);
octal.push(current_char);
ParseState::Octal(octal)
// Substract 1 from offset to account for backslash
UnquoteState::Octal {
octal_begin_offset: offset - 1,
octal,
}
}
(ParseState::Octal(mut octal), b'0'..=b'7') => {
(UnquoteState::HasEscapeEscape { out }, b'0'..=b'7') => {
let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH);
octal.push(current_char);
ParseState::Octal(octal)
// Substract 1 from offset to account for backslash
UnquoteState::HasEscapeOctal { out, octal }
}
_ => panic!("Invalid state unquoting string."),
(
UnquoteState::Octal {
octal_begin_offset,
mut octal,
},
b'0'..=b'7',
) => {
octal.push(current_char);
UnquoteState::Octal {
octal_begin_offset,
octal,
}
}
(UnquoteState::HasEscapeOctal { out, mut octal }, b'0'..=b'7') => {
octal.push(current_char);
UnquoteState::HasEscapeOctal { out, octal }
}
(state, _) => panic!(
"Invalid state unquoting string: {:?} | {} | {:?}",
state, offset, interior_text
),
};
}
Ok(String::from_utf8(out)?)
match state {
UnquoteState::Normal | UnquoteState::Escape | UnquoteState::Octal { .. } => {
Ok(Cow::Borrowed(interior_text))
}
UnquoteState::HasEscape { out } => Ok(Cow::Owned(String::from_utf8(out)?)),
UnquoteState::HasEscapeEscape { mut out } => {
out.push(b'\\');
Ok(Cow::Owned(String::from_utf8(out)?))
}
UnquoteState::HasEscapeOctal { mut out, octal } => {
out.push(b'\\');
out.extend(octal);
Ok(Cow::Owned(String::from_utf8(out)?))
}
}
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]

View File

@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::str::FromStr;
use super::compare_field::compare_property_list_of_quoted_string;
@ -206,10 +207,10 @@ pub(crate) fn get_property_unquoted_atom<'s>(
/// Get a named property containing an quoted string from the emacs token.
///
/// Returns None if key is not found.
pub(crate) fn get_property_quoted_string(
emacs: &Token<'_>,
pub(crate) fn get_property_quoted_string<'s>(
emacs: &Token<'s>,
key: &str,
) -> Result<Option<String>, Box<dyn std::error::Error>> {
) -> Result<Option<Cow<'s, str>>, Box<dyn std::error::Error>> {
get_property(emacs, key)?
.map(Token::as_atom)
.map_or(Ok(None), |r| r.map(Some))?