Move the angle link string processing functions to a util file.

Since I bothered to do the right thing and implement these as returning Cow so I can avoid unnecessary allocations, I figure I should move them to a util file so they can be re-used.
This commit is contained in:
Tom Alexander 2023-10-08 13:36:57 -04:00
parent ba55e0df4f
commit 0e791e67ab
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
3 changed files with 82 additions and 61 deletions

View File

@ -8,6 +8,7 @@ mod macros;
mod object;
mod source;
mod standard_properties;
mod util;
pub(crate) use ast_node::AstNode;
pub use document::Document;
pub use document::DocumentElement;

View File

@ -1,6 +1,8 @@
use std::borrow::Borrow;
use std::borrow::Cow;
use super::util::remove_line_break;
use super::util::remove_whitespace_if_line_break;
use super::GetStandardProperties;
use super::StandardProperties;
@ -738,73 +740,14 @@ enum PathState {
HasLineBreak(String),
}
enum SearchOptionState {
Normal,
HasWhitespace(usize),
HasLineBreak(String),
}
impl<'s> AngleLink<'s> {
/// Remove line breaks but preserve multiple consecutive spaces.
pub fn get_path(&self) -> Cow<'s, str> {
let mut state = PathState::Normal;
for (offset, c) in self.path.char_indices() {
match (&mut state, c) {
(PathState::Normal, '\n') => {
let mut ret = String::with_capacity(self.path.len());
ret.push_str(&self.path[..offset]);
state = PathState::HasLineBreak(ret);
}
(PathState::Normal, _) => {}
(PathState::HasLineBreak(_), '\n') => {}
(PathState::HasLineBreak(ret), _) => {
ret.push(c);
}
}
}
match state {
PathState::Normal => Cow::Borrowed(self.path),
PathState::HasLineBreak(ret) => Cow::Owned(ret),
}
remove_line_break(self.path)
}
/// Remove all whitespace but only if search_option contains a line break.
pub fn get_search_option(&self) -> Option<Cow<'s, str>> {
self.search_option.map(|search_option| {
let mut state = SearchOptionState::Normal;
for (offset, c) in search_option.char_indices() {
match (&mut state, c) {
(SearchOptionState::Normal, '\n') => {
let mut ret = String::with_capacity(search_option.len());
ret.push_str(&search_option[..offset]);
state = SearchOptionState::HasLineBreak(ret);
}
(SearchOptionState::Normal, ' ' | '\t') => {
state = SearchOptionState::HasWhitespace(offset);
}
(SearchOptionState::Normal, _) => {}
(SearchOptionState::HasWhitespace(first_whitespace_offset), '\n') => {
let mut ret = String::with_capacity(search_option.len());
ret.push_str(&search_option[..*first_whitespace_offset]);
for c in search_option[*first_whitespace_offset..offset].chars() {
if !c.is_ascii_whitespace() {
ret.push(c);
}
}
state = SearchOptionState::HasLineBreak(ret);
}
(SearchOptionState::HasWhitespace(_), _) => {}
(SearchOptionState::HasLineBreak(_), ' ' | '\t' | '\r' | '\n') => {}
(SearchOptionState::HasLineBreak(ret), _) => {
ret.push(c);
}
}
}
match state {
SearchOptionState::Normal => Cow::Borrowed(search_option),
SearchOptionState::HasWhitespace(_) => Cow::Borrowed(search_option),
SearchOptionState::HasLineBreak(ret) => Cow::Owned(ret),
}
})
self.search_option.map(remove_whitespace_if_line_break)
}
}

77
src/types/util.rs Normal file
View File

@ -0,0 +1,77 @@
use std::borrow::Cow;
/// Removes all whitespace from a string if any line breaks are present.
///
/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foobar".
pub(crate) fn remove_whitespace_if_line_break<'s>(input: &'s str) -> Cow<'s, str> {
let mut state = RemoveWhitespaceIfLineBreakState::Normal;
for (offset, c) in input.char_indices() {
match (&mut state, c) {
(RemoveWhitespaceIfLineBreakState::Normal, '\n') => {
let mut ret = String::with_capacity(input.len());
ret.push_str(&input[..offset]);
state = RemoveWhitespaceIfLineBreakState::HasLineBreak(ret);
}
(RemoveWhitespaceIfLineBreakState::Normal, ' ' | '\t') => {
state = RemoveWhitespaceIfLineBreakState::HasWhitespace(offset);
}
(RemoveWhitespaceIfLineBreakState::Normal, _) => {}
(RemoveWhitespaceIfLineBreakState::HasWhitespace(first_whitespace_offset), '\n') => {
let mut ret = String::with_capacity(input.len());
ret.push_str(&input[..*first_whitespace_offset]);
for c in input[*first_whitespace_offset..offset].chars() {
if !c.is_ascii_whitespace() {
ret.push(c);
}
}
state = RemoveWhitespaceIfLineBreakState::HasLineBreak(ret);
}
(RemoveWhitespaceIfLineBreakState::HasWhitespace(_), _) => {}
(RemoveWhitespaceIfLineBreakState::HasLineBreak(_), ' ' | '\t' | '\r' | '\n') => {}
(RemoveWhitespaceIfLineBreakState::HasLineBreak(ret), _) => {
ret.push(c);
}
}
}
match state {
RemoveWhitespaceIfLineBreakState::Normal => Cow::Borrowed(input),
RemoveWhitespaceIfLineBreakState::HasWhitespace(_) => Cow::Borrowed(input),
RemoveWhitespaceIfLineBreakState::HasLineBreak(ret) => Cow::Owned(ret),
}
}
enum RemoveWhitespaceIfLineBreakState {
Normal,
HasWhitespace(usize),
HasLineBreak(String),
}
/// Removes all line breaks from a string
///
/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foo bar".
pub(crate) fn remove_line_break<'s>(input: &'s str) -> Cow<'s, str> {
let mut state = RemoveLineBreakState::Normal;
for (offset, c) in input.char_indices() {
match (&mut state, c) {
(RemoveLineBreakState::Normal, '\n') => {
let mut ret = String::with_capacity(input.len());
ret.push_str(&input[..offset]);
state = RemoveLineBreakState::HasLineBreak(ret);
}
(RemoveLineBreakState::Normal, _) => {}
(RemoveLineBreakState::HasLineBreak(_), '\n') => {}
(RemoveLineBreakState::HasLineBreak(ret), _) => {
ret.push(c);
}
}
}
match state {
RemoveLineBreakState::Normal => Cow::Borrowed(input),
RemoveLineBreakState::HasLineBreak(ret) => Cow::Owned(ret),
}
}
enum RemoveLineBreakState {
Normal,
HasLineBreak(String),
}