Improve coalesce_whitespace_escaped to borrow when single spaces are used.

This commit is contained in:
Tom Alexander 2023-10-08 16:15:49 -04:00
parent 41aa0349a0
commit 17c745ee71
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE

View File

@ -259,15 +259,33 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>(
escape_character: char, escape_character: char,
escapable_characters: C, escapable_characters: C,
) -> Cow<'s, str> { ) -> Cow<'s, str> {
let mut state = CoalesceWhitespaceEscaped::Normal; let mut state = CoalesceWhitespaceEscaped::Normal {
in_whitespace: false,
};
for (offset, c) in input.char_indices() { for (offset, c) in input.char_indices() {
state = match (state, c) { state = match (state, c) {
(CoalesceWhitespaceEscaped::Normal, c) if c == escape_character => { (CoalesceWhitespaceEscaped::Normal { in_whitespace: _ }, c)
if c == escape_character =>
{
CoalesceWhitespaceEscaped::NormalEscaping { CoalesceWhitespaceEscaped::NormalEscaping {
escape_offset: offset, escape_offset: offset,
} }
} }
(CoalesceWhitespaceEscaped::Normal, ' ' | '\t' | '\r' | '\n') => { (CoalesceWhitespaceEscaped::Normal { in_whitespace }, ' ') => {
if in_whitespace {
let mut ret = String::with_capacity(input.len());
ret.push_str(&input[..offset]);
CoalesceWhitespaceEscaped::RequiresMutation {
in_whitespace: true,
ret,
}
} else {
CoalesceWhitespaceEscaped::Normal {
in_whitespace: true,
}
}
}
(CoalesceWhitespaceEscaped::Normal { in_whitespace: _ }, '\t' | '\r' | '\n') => {
let mut ret = String::with_capacity(input.len()); let mut ret = String::with_capacity(input.len());
ret.push_str(&input[..offset]); ret.push_str(&input[..offset]);
ret.push(' '); ret.push(' ');
@ -276,7 +294,11 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>(
ret, ret,
} }
} }
(CoalesceWhitespaceEscaped::Normal, _) => CoalesceWhitespaceEscaped::Normal, (CoalesceWhitespaceEscaped::Normal { in_whitespace: _ }, _) => {
CoalesceWhitespaceEscaped::Normal {
in_whitespace: false,
}
}
(CoalesceWhitespaceEscaped::NormalEscaping { escape_offset }, c) (CoalesceWhitespaceEscaped::NormalEscaping { escape_offset }, c)
if escapable_characters(c) => if escapable_characters(c) =>
{ {
@ -290,9 +312,15 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>(
} }
} }
(CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ }, ' ') => {
// We didn't escape the character so continue as normal.
CoalesceWhitespaceEscaped::Normal {
in_whitespace: true,
}
}
( (
CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ }, CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ },
' ' | '\t' | '\r' | '\n', '\t' | '\r' | '\n',
) => { ) => {
// We didn't escape the character but we hit whitespace anyway. // We didn't escape the character but we hit whitespace anyway.
let mut ret = String::with_capacity(input.len()); let mut ret = String::with_capacity(input.len());
@ -305,7 +333,9 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>(
} }
(CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ }, _) => { (CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ }, _) => {
// We didn't escape the character so continue as normal. // We didn't escape the character so continue as normal.
CoalesceWhitespaceEscaped::Normal CoalesceWhitespaceEscaped::Normal {
in_whitespace: false,
}
} }
( (
@ -379,7 +409,6 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>(
) => { ) => {
ret.push(matched_escape_character); ret.push(matched_escape_character);
ret.push(c); ret.push(c);
// TODO
CoalesceWhitespaceEscaped::RequiresMutation { CoalesceWhitespaceEscaped::RequiresMutation {
in_whitespace: false, in_whitespace: false,
ret, ret,
@ -388,7 +417,7 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>(
} }
} }
match state { match state {
CoalesceWhitespaceEscaped::Normal => Cow::Borrowed(input), CoalesceWhitespaceEscaped::Normal { in_whitespace: _ } => Cow::Borrowed(input),
CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ } => Cow::Borrowed(input), CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ } => Cow::Borrowed(input),
CoalesceWhitespaceEscaped::RequiresMutation { CoalesceWhitespaceEscaped::RequiresMutation {
in_whitespace: _, in_whitespace: _,
@ -405,7 +434,9 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>(
} }
enum CoalesceWhitespaceEscaped { enum CoalesceWhitespaceEscaped {
Normal, Normal {
in_whitespace: bool,
},
NormalEscaping { NormalEscaping {
escape_offset: usize, escape_offset: usize,
}, },
@ -437,8 +468,7 @@ mod tests {
let input = "foo bar baz"; let input = "foo bar baz";
let output = coalesce_whitespace_escaped('&', |c| "".contains(c))(input); let output = coalesce_whitespace_escaped('&', |c| "".contains(c))(input);
assert_eq!(output, "foo bar baz"); assert_eq!(output, "foo bar baz");
// TODO: Technically this should be a Borrowed but to keep the code simple for now we are treating all whitespace as causing ownership. assert!(matches!(output, Cow::Borrowed(_)));
assert!(matches!(output, Cow::Owned(_)));
Ok(()) Ok(())
} }
@ -468,4 +498,14 @@ mod tests {
assert!(matches!(output, Cow::Owned(_))); assert!(matches!(output, Cow::Owned(_)));
Ok(()) Ok(())
} }
#[test]
fn coalesce_whitespace_escaped_escape_mismatch_around_whitespace(
) -> Result<(), Box<dyn std::error::Error>> {
let input = "foo& bar &baz";
let output = coalesce_whitespace_escaped('&', |c| "z".contains(c))(input);
assert_eq!(output, "foo& bar &baz");
assert!(matches!(output, Cow::Borrowed(_)));
Ok(())
}
} }