Switch to using a similar optimized Cow function for regular link.

This commit is contained in:
Tom Alexander 2023-10-08 14:11:46 -04:00
parent 0e791e67ab
commit 42dbda494a
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
2 changed files with 131 additions and 58 deletions

View File

@ -1,6 +1,7 @@
use std::borrow::Borrow;
use std::borrow::Cow;
use super::util::coalesce_whitespace_if_line_break;
use super::util::remove_line_break;
use super::util::remove_whitespace_if_line_break;
use super::GetStandardProperties;
@ -665,67 +666,22 @@ pub enum LinkType<'s> {
Fuzzy,
}
#[derive(Debug)]
enum ParserState {
Normal,
InWhitespace,
}
/// Org-mode treats multiple consecutive whitespace characters as a single space. This function performs that transformation.
///
/// Example: `orgify_text("foo \t\n bar") == "foo bar"`
pub(crate) fn orgify_text<T: AsRef<str>>(raw_text: T) -> String {
let raw_text = raw_text.as_ref();
let mut ret = String::with_capacity(raw_text.len());
let mut state = ParserState::Normal;
for c in raw_text.chars() {
state = match (&state, c) {
(ParserState::Normal, _) if " \t\r\n".contains(c) => {
ret.push(' ');
ParserState::InWhitespace
}
(ParserState::InWhitespace, _) if " \t\r\n".contains(c) => ParserState::InWhitespace,
(ParserState::Normal, _) => {
ret.push(c);
ParserState::Normal
}
(ParserState::InWhitespace, _) => {
ret.push(c);
ParserState::Normal
}
};
}
ret
}
impl<'s> RegularLink<'s> {
/// Orgify the raw_link if it contains line breaks.
pub fn get_raw_link(&self) -> String {
if self.raw_link.contains('\n') {
orgify_text(Borrow::<str>::borrow(&self.raw_link))
} else {
self.raw_link.clone().into_owned()
}
pub fn get_raw_link(&'s self) -> Cow<'s, str> {
coalesce_whitespace_if_line_break(&self.raw_link)
}
/// Orgify the path if it contains line breaks.
pub fn get_path(&self) -> String {
if self.path.contains('\n') {
orgify_text(Borrow::<str>::borrow(&self.path))
} else {
self.path.clone().into_owned()
}
pub fn get_path(&'s self) -> Cow<'s, str> {
coalesce_whitespace_if_line_break(&self.path)
}
/// Orgify the search_option if it contains line breaks.
pub fn get_search_option(&self) -> Option<String> {
self.search_option.as_ref().map(|search_option| {
if search_option.contains('\n') {
orgify_text(search_option)
} else {
search_option.clone().into_owned()
}
})
pub fn get_search_option(&'s self) -> Option<Cow<'s, str>> {
self.search_option
.as_ref()
.map(|search_option| coalesce_whitespace_if_line_break(search_option.borrow()))
}
}
@ -735,11 +691,6 @@ impl<'s> RadioLink<'s> {
}
}
enum PathState {
Normal,
HasLineBreak(String),
}
impl<'s> AngleLink<'s> {
/// Remove line breaks but preserve multiple consecutive spaces.
pub fn get_path(&self) -> Cow<'s, str> {

View File

@ -75,3 +75,125 @@ enum RemoveLineBreakState {
Normal,
HasLineBreak(String),
}
/// Removes all whitespace from a string if any line breaks are present.
///
/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foobar".
pub(crate) fn coalesce_whitespace_if_line_break<'s>(input: &'s str) -> Cow<'s, str> {
let mut state = CoalesceWhitespaceIfLineBreakState::Normal;
for (offset, c) in input.char_indices() {
match (&mut state, c) {
(CoalesceWhitespaceIfLineBreakState::Normal, '\n') => {
// Hit line break without any preceding whitespace
let mut ret = String::with_capacity(input.len());
ret.push_str(&input[..offset]);
ret.push(' ');
state = CoalesceWhitespaceIfLineBreakState::HasLineBreak {
in_whitespace: true,
ret,
};
}
(CoalesceWhitespaceIfLineBreakState::Normal, ' ' | '\t') => {
state = CoalesceWhitespaceIfLineBreakState::HasWhitespace {
in_whitespace: true,
first_whitespace_offset: offset,
};
}
(CoalesceWhitespaceIfLineBreakState::Normal, _) => {}
(
CoalesceWhitespaceIfLineBreakState::HasWhitespace {
in_whitespace,
first_whitespace_offset,
},
'\n',
) => {
// Hit line break with preceding whitespace so we add all the text up to the first whitespace and then process the remaining text coalescing the whitespace.
let mut ret = String::with_capacity(input.len());
ret.push_str(&input[..*first_whitespace_offset]);
let mut sub_loop_in_whitespace = false;
for c in input[*first_whitespace_offset..offset].chars() {
if sub_loop_in_whitespace {
if !c.is_ascii_whitespace() {
// Preceding character was whitespace but this is not.
sub_loop_in_whitespace = false;
ret.push(c);
}
// Do nothing if preceding character was whitespace and this character also is whitespace.
} else {
if c.is_ascii_whitespace() {
// Preceding character was not whitespace but this is.
sub_loop_in_whitespace = true;
ret.push(' ');
} else {
// Preceding character was not whitespace and this is not either.
ret.push(c);
}
}
}
if !*in_whitespace {
// If this line break was the start of whitespace then we need to inject a space character for it.
ret.push(' ');
}
state = CoalesceWhitespaceIfLineBreakState::HasLineBreak {
in_whitespace: true, // This was triggered by a line break which is whitespace.
ret,
};
}
(
CoalesceWhitespaceIfLineBreakState::HasWhitespace {
in_whitespace,
first_whitespace_offset: _,
},
' ' | '\t',
) => {
*in_whitespace = true;
}
(
CoalesceWhitespaceIfLineBreakState::HasWhitespace {
in_whitespace,
first_whitespace_offset: _,
},
_,
) => {
*in_whitespace = false;
}
(
CoalesceWhitespaceIfLineBreakState::HasLineBreak { in_whitespace, ret },
' ' | '\t' | '\r' | '\n',
) => {
if !*in_whitespace {
ret.push(' ');
}
*in_whitespace = true;
}
(CoalesceWhitespaceIfLineBreakState::HasLineBreak { in_whitespace, ret }, _) => {
*in_whitespace = false;
ret.push(c);
}
}
}
match state {
CoalesceWhitespaceIfLineBreakState::Normal => Cow::Borrowed(input),
CoalesceWhitespaceIfLineBreakState::HasWhitespace {
in_whitespace: _,
first_whitespace_offset: _,
} => Cow::Borrowed(input),
CoalesceWhitespaceIfLineBreakState::HasLineBreak {
in_whitespace: _,
ret,
} => Cow::Owned(ret),
}
}
enum CoalesceWhitespaceIfLineBreakState {
Normal,
HasWhitespace {
in_whitespace: bool,
first_whitespace_offset: usize,
},
HasLineBreak {
in_whitespace: bool,
ret: String,
},
}