Only allocate memory when unquoting sexp string that contains escapes.
If the quoted string contains no escape sequences, then unquoting the string can be done by simply shaving off the leading and trailing quotation marks which can be a slice operation. By returning Cow, we can return either a borrowed slice or an owned String.
This commit is contained in:
parent
503db94b2c
commit
acfc5e5e68
@ -1,3 +1,5 @@
|
|||||||
|
use std::borrow::Borrow;
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::collections::BTreeSet;
|
use std::collections::BTreeSet;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
@ -262,11 +264,11 @@ pub(crate) fn compare_property_set_of_quoted_string<
|
|||||||
.iter()
|
.iter()
|
||||||
.map(|e| e.as_atom())
|
.map(|e| e.as_atom())
|
||||||
.collect::<Result<Vec<_>, _>>()?;
|
.collect::<Result<Vec<_>, _>>()?;
|
||||||
let value: Vec<String> = value
|
let value: Vec<Cow<'_, str>> = value
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(unquote)
|
.map(unquote)
|
||||||
.collect::<Result<Vec<_>, _>>()?;
|
.collect::<Result<Vec<_>, _>>()?;
|
||||||
let value: BTreeSet<&str> = value.iter().map(|e| e.as_str()).collect();
|
let value: BTreeSet<&str> = value.iter().map(|e| e.borrow()).collect();
|
||||||
let mismatched: Vec<_> = value.symmetric_difference(&rust_value).copied().collect();
|
let mismatched: Vec<_> = value.symmetric_difference(&rust_value).copied().collect();
|
||||||
if !mismatched.is_empty() {
|
if !mismatched.is_empty() {
|
||||||
let this_status = DiffStatus::Bad;
|
let this_status = DiffStatus::Bad;
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use nom::branch::alt;
|
use nom::branch::alt;
|
||||||
@ -36,12 +37,6 @@ pub struct TextWithProperties<'s> {
|
|||||||
pub(crate) properties: Vec<Token<'s>>,
|
pub(crate) properties: Vec<Token<'s>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
enum ParseState {
|
|
||||||
Normal,
|
|
||||||
Escape,
|
|
||||||
Octal(Vec<u8>),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s> Token<'s> {
|
impl<'s> Token<'s> {
|
||||||
pub(crate) fn as_vector<'p>(
|
pub(crate) fn as_vector<'p>(
|
||||||
&'p self,
|
&'p self,
|
||||||
@ -117,8 +112,27 @@ fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {
|
|||||||
&input[..offset]
|
&input[..offset]
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn unquote(text: &str) -> Result<String, Box<dyn std::error::Error>> {
|
#[derive(Debug)]
|
||||||
let mut out: Vec<u8> = Vec::with_capacity(text.len());
|
enum UnquoteState {
|
||||||
|
Normal,
|
||||||
|
Escape,
|
||||||
|
HasEscape {
|
||||||
|
out: Vec<u8>,
|
||||||
|
},
|
||||||
|
HasEscapeEscape {
|
||||||
|
out: Vec<u8>,
|
||||||
|
},
|
||||||
|
Octal {
|
||||||
|
octal_begin_offset: usize,
|
||||||
|
octal: Vec<u8>,
|
||||||
|
},
|
||||||
|
HasEscapeOctal {
|
||||||
|
out: Vec<u8>,
|
||||||
|
octal: Vec<u8>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn unquote(text: &str) -> Result<Cow<'_, str>, Box<dyn std::error::Error>> {
|
||||||
if !text.starts_with('"') {
|
if !text.starts_with('"') {
|
||||||
return Err("Quoted text does not start with quote.".into());
|
return Err("Quoted text does not start with quote.".into());
|
||||||
}
|
}
|
||||||
@ -126,54 +140,143 @@ pub(crate) fn unquote(text: &str) -> Result<String, Box<dyn std::error::Error>>
|
|||||||
return Err("Quoted text does not end with quote.".into());
|
return Err("Quoted text does not end with quote.".into());
|
||||||
}
|
}
|
||||||
let interior_text = &text[1..(text.len() - 1)];
|
let interior_text = &text[1..(text.len() - 1)];
|
||||||
let mut state = ParseState::Normal;
|
let mut state = UnquoteState::Normal;
|
||||||
for current_char in interior_text.bytes() {
|
for (offset, current_char) in interior_text.bytes().enumerate() {
|
||||||
// Check to see if octal finished
|
// Check to see if octal finished
|
||||||
state = match (state, current_char) {
|
state = match (state, current_char) {
|
||||||
(ParseState::Octal(octal), b'0'..=b'7') if octal.len() < MAX_OCTAL_LENGTH => {
|
(
|
||||||
ParseState::Octal(octal)
|
UnquoteState::Octal {
|
||||||
|
octal_begin_offset,
|
||||||
|
octal,
|
||||||
|
},
|
||||||
|
b'0'..=b'7',
|
||||||
|
) if octal.len() < MAX_OCTAL_LENGTH => UnquoteState::Octal {
|
||||||
|
octal_begin_offset,
|
||||||
|
octal,
|
||||||
|
},
|
||||||
|
(
|
||||||
|
UnquoteState::Octal {
|
||||||
|
octal_begin_offset,
|
||||||
|
octal,
|
||||||
|
},
|
||||||
|
_,
|
||||||
|
) => {
|
||||||
|
let octal_number_string = String::from_utf8(octal)?;
|
||||||
|
let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?;
|
||||||
|
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
|
||||||
|
out.extend_from_slice(&interior_text.as_bytes()[..octal_begin_offset]);
|
||||||
|
out.push(decoded_byte);
|
||||||
|
UnquoteState::HasEscape { out }
|
||||||
}
|
}
|
||||||
(ParseState::Octal(octal), _) => {
|
(UnquoteState::HasEscapeOctal { out, octal }, b'0'..=b'7')
|
||||||
|
if octal.len() < MAX_OCTAL_LENGTH =>
|
||||||
|
{
|
||||||
|
UnquoteState::HasEscapeOctal { out, octal }
|
||||||
|
}
|
||||||
|
(UnquoteState::HasEscapeOctal { mut out, octal }, _) => {
|
||||||
let octal_number_string = String::from_utf8(octal)?;
|
let octal_number_string = String::from_utf8(octal)?;
|
||||||
let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?;
|
let decoded_byte = u8::from_str_radix(&octal_number_string, 8)?;
|
||||||
out.push(decoded_byte);
|
out.push(decoded_byte);
|
||||||
ParseState::Normal
|
UnquoteState::HasEscape { out }
|
||||||
}
|
}
|
||||||
(state, _) => state,
|
(state, _) => state,
|
||||||
};
|
};
|
||||||
|
|
||||||
state = match (state, current_char) {
|
state = match (state, current_char) {
|
||||||
(ParseState::Normal, b'\\') => ParseState::Escape,
|
(UnquoteState::Normal, b'\\') => UnquoteState::Escape,
|
||||||
(ParseState::Normal, _) => {
|
(UnquoteState::Normal, _) => UnquoteState::Normal,
|
||||||
|
(UnquoteState::HasEscape { out }, b'\\') => UnquoteState::HasEscapeEscape { out },
|
||||||
|
(UnquoteState::HasEscape { mut out }, _) => {
|
||||||
out.push(current_char);
|
out.push(current_char);
|
||||||
ParseState::Normal
|
UnquoteState::HasEscape { out }
|
||||||
}
|
}
|
||||||
(ParseState::Escape, b'n') => {
|
(UnquoteState::Escape, b'n') => {
|
||||||
|
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
|
||||||
|
// Subtract 1 from offset to account for backslash.
|
||||||
|
out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]);
|
||||||
out.push(b'\n');
|
out.push(b'\n');
|
||||||
ParseState::Normal
|
UnquoteState::HasEscape { out }
|
||||||
}
|
}
|
||||||
(ParseState::Escape, b'\\') => {
|
(UnquoteState::HasEscapeEscape { mut out }, b'n') => {
|
||||||
|
out.push(b'\n');
|
||||||
|
UnquoteState::HasEscape { out }
|
||||||
|
}
|
||||||
|
(UnquoteState::Escape, b'\\') => {
|
||||||
|
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
|
||||||
|
// Subtract 1 from offset to account for backslash.
|
||||||
|
out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]);
|
||||||
out.push(b'\\');
|
out.push(b'\\');
|
||||||
ParseState::Normal
|
UnquoteState::HasEscape { out }
|
||||||
}
|
}
|
||||||
(ParseState::Escape, b'"') => {
|
(UnquoteState::HasEscapeEscape { mut out }, b'\\') => {
|
||||||
|
out.push(b'\\');
|
||||||
|
UnquoteState::HasEscape { out }
|
||||||
|
}
|
||||||
|
(UnquoteState::Escape, b'"') => {
|
||||||
|
let mut out: Vec<u8> = Vec::with_capacity(interior_text.len());
|
||||||
|
// Subtract 1 from offset to account for backslash.
|
||||||
|
out.extend_from_slice(&interior_text.as_bytes()[..(offset - 1)]);
|
||||||
out.push(b'"');
|
out.push(b'"');
|
||||||
ParseState::Normal
|
UnquoteState::HasEscape { out }
|
||||||
}
|
}
|
||||||
(ParseState::Escape, b'0'..=b'7') => {
|
(UnquoteState::HasEscapeEscape { mut out }, b'"') => {
|
||||||
|
out.push(b'"');
|
||||||
|
UnquoteState::HasEscape { out }
|
||||||
|
}
|
||||||
|
(UnquoteState::Escape, b'0'..=b'7') => {
|
||||||
let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH);
|
let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH);
|
||||||
octal.push(current_char);
|
octal.push(current_char);
|
||||||
ParseState::Octal(octal)
|
// Substract 1 from offset to account for backslash
|
||||||
|
UnquoteState::Octal {
|
||||||
|
octal_begin_offset: offset - 1,
|
||||||
|
octal,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
(ParseState::Octal(mut octal), b'0'..=b'7') => {
|
(UnquoteState::HasEscapeEscape { out }, b'0'..=b'7') => {
|
||||||
|
let mut octal = Vec::with_capacity(MAX_OCTAL_LENGTH);
|
||||||
octal.push(current_char);
|
octal.push(current_char);
|
||||||
ParseState::Octal(octal)
|
// Substract 1 from offset to account for backslash
|
||||||
|
UnquoteState::HasEscapeOctal { out, octal }
|
||||||
}
|
}
|
||||||
_ => panic!("Invalid state unquoting string."),
|
(
|
||||||
|
UnquoteState::Octal {
|
||||||
|
octal_begin_offset,
|
||||||
|
mut octal,
|
||||||
|
},
|
||||||
|
b'0'..=b'7',
|
||||||
|
) => {
|
||||||
|
octal.push(current_char);
|
||||||
|
UnquoteState::Octal {
|
||||||
|
octal_begin_offset,
|
||||||
|
octal,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(UnquoteState::HasEscapeOctal { out, mut octal }, b'0'..=b'7') => {
|
||||||
|
octal.push(current_char);
|
||||||
|
UnquoteState::HasEscapeOctal { out, octal }
|
||||||
|
}
|
||||||
|
(state, _) => panic!(
|
||||||
|
"Invalid state unquoting string: {:?} | {} | {:?}",
|
||||||
|
state, offset, interior_text
|
||||||
|
),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(String::from_utf8(out)?)
|
match state {
|
||||||
|
UnquoteState::Normal | UnquoteState::Escape | UnquoteState::Octal { .. } => {
|
||||||
|
Ok(Cow::Borrowed(interior_text))
|
||||||
|
}
|
||||||
|
UnquoteState::HasEscape { out } => Ok(Cow::Owned(String::from_utf8(out)?)),
|
||||||
|
UnquoteState::HasEscapeEscape { mut out } => {
|
||||||
|
out.push(b'\\');
|
||||||
|
Ok(Cow::Owned(String::from_utf8(out)?))
|
||||||
|
}
|
||||||
|
UnquoteState::HasEscapeOctal { mut out, octal } => {
|
||||||
|
out.push(b'\\');
|
||||||
|
out.extend(octal);
|
||||||
|
Ok(Cow::Owned(String::from_utf8(out)?))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use super::compare_field::compare_property_list_of_quoted_string;
|
use super::compare_field::compare_property_list_of_quoted_string;
|
||||||
@ -206,10 +207,10 @@ pub(crate) fn get_property_unquoted_atom<'s>(
|
|||||||
/// Get a named property containing an quoted string from the emacs token.
|
/// Get a named property containing an quoted string from the emacs token.
|
||||||
///
|
///
|
||||||
/// Returns None if key is not found.
|
/// Returns None if key is not found.
|
||||||
pub(crate) fn get_property_quoted_string(
|
pub(crate) fn get_property_quoted_string<'s>(
|
||||||
emacs: &Token<'_>,
|
emacs: &Token<'s>,
|
||||||
key: &str,
|
key: &str,
|
||||||
) -> Result<Option<String>, Box<dyn std::error::Error>> {
|
) -> Result<Option<Cow<'s, str>>, Box<dyn std::error::Error>> {
|
||||||
get_property(emacs, key)?
|
get_property(emacs, key)?
|
||||||
.map(Token::as_atom)
|
.map(Token::as_atom)
|
||||||
.map_or(Ok(None), |r| r.map(Some))?
|
.map_or(Ok(None), |r| r.map(Some))?
|
||||||
|
Loading…
Reference in New Issue
Block a user