Switch to using a similar optimized Cow function for regular link.
This commit is contained in:
parent
0e791e67ab
commit
42dbda494a
@ -1,6 +1,7 @@
|
||||
use std::borrow::Borrow;
|
||||
use std::borrow::Cow;
|
||||
|
||||
use super::util::coalesce_whitespace_if_line_break;
|
||||
use super::util::remove_line_break;
|
||||
use super::util::remove_whitespace_if_line_break;
|
||||
use super::GetStandardProperties;
|
||||
@ -665,67 +666,22 @@ pub enum LinkType<'s> {
|
||||
Fuzzy,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum ParserState {
|
||||
Normal,
|
||||
InWhitespace,
|
||||
}
|
||||
|
||||
/// Org-mode treats multiple consecutive whitespace characters as a single space. This function performs that transformation.
|
||||
///
|
||||
/// Example: `orgify_text("foo \t\n bar") == "foo bar"`
|
||||
pub(crate) fn orgify_text<T: AsRef<str>>(raw_text: T) -> String {
|
||||
let raw_text = raw_text.as_ref();
|
||||
let mut ret = String::with_capacity(raw_text.len());
|
||||
let mut state = ParserState::Normal;
|
||||
for c in raw_text.chars() {
|
||||
state = match (&state, c) {
|
||||
(ParserState::Normal, _) if " \t\r\n".contains(c) => {
|
||||
ret.push(' ');
|
||||
ParserState::InWhitespace
|
||||
}
|
||||
(ParserState::InWhitespace, _) if " \t\r\n".contains(c) => ParserState::InWhitespace,
|
||||
(ParserState::Normal, _) => {
|
||||
ret.push(c);
|
||||
ParserState::Normal
|
||||
}
|
||||
(ParserState::InWhitespace, _) => {
|
||||
ret.push(c);
|
||||
ParserState::Normal
|
||||
}
|
||||
};
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
impl<'s> RegularLink<'s> {
|
||||
/// Orgify the raw_link if it contains line breaks.
|
||||
pub fn get_raw_link(&self) -> String {
|
||||
if self.raw_link.contains('\n') {
|
||||
orgify_text(Borrow::<str>::borrow(&self.raw_link))
|
||||
} else {
|
||||
self.raw_link.clone().into_owned()
|
||||
}
|
||||
pub fn get_raw_link(&'s self) -> Cow<'s, str> {
|
||||
coalesce_whitespace_if_line_break(&self.raw_link)
|
||||
}
|
||||
|
||||
/// Orgify the path if it contains line breaks.
|
||||
pub fn get_path(&self) -> String {
|
||||
if self.path.contains('\n') {
|
||||
orgify_text(Borrow::<str>::borrow(&self.path))
|
||||
} else {
|
||||
self.path.clone().into_owned()
|
||||
}
|
||||
pub fn get_path(&'s self) -> Cow<'s, str> {
|
||||
coalesce_whitespace_if_line_break(&self.path)
|
||||
}
|
||||
|
||||
/// Orgify the search_option if it contains line breaks.
|
||||
pub fn get_search_option(&self) -> Option<String> {
|
||||
self.search_option.as_ref().map(|search_option| {
|
||||
if search_option.contains('\n') {
|
||||
orgify_text(search_option)
|
||||
} else {
|
||||
search_option.clone().into_owned()
|
||||
}
|
||||
})
|
||||
pub fn get_search_option(&'s self) -> Option<Cow<'s, str>> {
|
||||
self.search_option
|
||||
.as_ref()
|
||||
.map(|search_option| coalesce_whitespace_if_line_break(search_option.borrow()))
|
||||
}
|
||||
}
|
||||
|
||||
@ -735,11 +691,6 @@ impl<'s> RadioLink<'s> {
|
||||
}
|
||||
}
|
||||
|
||||
enum PathState {
|
||||
Normal,
|
||||
HasLineBreak(String),
|
||||
}
|
||||
|
||||
impl<'s> AngleLink<'s> {
|
||||
/// Remove line breaks but preserve multiple consecutive spaces.
|
||||
pub fn get_path(&self) -> Cow<'s, str> {
|
||||
|
@ -75,3 +75,125 @@ enum RemoveLineBreakState {
|
||||
Normal,
|
||||
HasLineBreak(String),
|
||||
}
|
||||
|
||||
/// Removes all whitespace from a string if any line breaks are present.
|
||||
///
|
||||
/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foobar".
|
||||
pub(crate) fn coalesce_whitespace_if_line_break<'s>(input: &'s str) -> Cow<'s, str> {
|
||||
let mut state = CoalesceWhitespaceIfLineBreakState::Normal;
|
||||
for (offset, c) in input.char_indices() {
|
||||
match (&mut state, c) {
|
||||
(CoalesceWhitespaceIfLineBreakState::Normal, '\n') => {
|
||||
// Hit line break without any preceding whitespace
|
||||
let mut ret = String::with_capacity(input.len());
|
||||
ret.push_str(&input[..offset]);
|
||||
ret.push(' ');
|
||||
state = CoalesceWhitespaceIfLineBreakState::HasLineBreak {
|
||||
in_whitespace: true,
|
||||
ret,
|
||||
};
|
||||
}
|
||||
(CoalesceWhitespaceIfLineBreakState::Normal, ' ' | '\t') => {
|
||||
state = CoalesceWhitespaceIfLineBreakState::HasWhitespace {
|
||||
in_whitespace: true,
|
||||
first_whitespace_offset: offset,
|
||||
};
|
||||
}
|
||||
(CoalesceWhitespaceIfLineBreakState::Normal, _) => {}
|
||||
|
||||
(
|
||||
CoalesceWhitespaceIfLineBreakState::HasWhitespace {
|
||||
in_whitespace,
|
||||
first_whitespace_offset,
|
||||
},
|
||||
'\n',
|
||||
) => {
|
||||
// Hit line break with preceding whitespace so we add all the text up to the first whitespace and then process the remaining text coalescing the whitespace.
|
||||
let mut ret = String::with_capacity(input.len());
|
||||
ret.push_str(&input[..*first_whitespace_offset]);
|
||||
let mut sub_loop_in_whitespace = false;
|
||||
for c in input[*first_whitespace_offset..offset].chars() {
|
||||
if sub_loop_in_whitespace {
|
||||
if !c.is_ascii_whitespace() {
|
||||
// Preceding character was whitespace but this is not.
|
||||
sub_loop_in_whitespace = false;
|
||||
ret.push(c);
|
||||
}
|
||||
// Do nothing if preceding character was whitespace and this character also is whitespace.
|
||||
} else {
|
||||
if c.is_ascii_whitespace() {
|
||||
// Preceding character was not whitespace but this is.
|
||||
sub_loop_in_whitespace = true;
|
||||
ret.push(' ');
|
||||
} else {
|
||||
// Preceding character was not whitespace and this is not either.
|
||||
ret.push(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
if !*in_whitespace {
|
||||
// If this line break was the start of whitespace then we need to inject a space character for it.
|
||||
ret.push(' ');
|
||||
}
|
||||
state = CoalesceWhitespaceIfLineBreakState::HasLineBreak {
|
||||
in_whitespace: true, // This was triggered by a line break which is whitespace.
|
||||
ret,
|
||||
};
|
||||
}
|
||||
(
|
||||
CoalesceWhitespaceIfLineBreakState::HasWhitespace {
|
||||
in_whitespace,
|
||||
first_whitespace_offset: _,
|
||||
},
|
||||
' ' | '\t',
|
||||
) => {
|
||||
*in_whitespace = true;
|
||||
}
|
||||
(
|
||||
CoalesceWhitespaceIfLineBreakState::HasWhitespace {
|
||||
in_whitespace,
|
||||
first_whitespace_offset: _,
|
||||
},
|
||||
_,
|
||||
) => {
|
||||
*in_whitespace = false;
|
||||
}
|
||||
(
|
||||
CoalesceWhitespaceIfLineBreakState::HasLineBreak { in_whitespace, ret },
|
||||
' ' | '\t' | '\r' | '\n',
|
||||
) => {
|
||||
if !*in_whitespace {
|
||||
ret.push(' ');
|
||||
}
|
||||
*in_whitespace = true;
|
||||
}
|
||||
(CoalesceWhitespaceIfLineBreakState::HasLineBreak { in_whitespace, ret }, _) => {
|
||||
*in_whitespace = false;
|
||||
ret.push(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
match state {
|
||||
CoalesceWhitespaceIfLineBreakState::Normal => Cow::Borrowed(input),
|
||||
CoalesceWhitespaceIfLineBreakState::HasWhitespace {
|
||||
in_whitespace: _,
|
||||
first_whitespace_offset: _,
|
||||
} => Cow::Borrowed(input),
|
||||
CoalesceWhitespaceIfLineBreakState::HasLineBreak {
|
||||
in_whitespace: _,
|
||||
ret,
|
||||
} => Cow::Owned(ret),
|
||||
}
|
||||
}
|
||||
|
||||
enum CoalesceWhitespaceIfLineBreakState {
|
||||
Normal,
|
||||
HasWhitespace {
|
||||
in_whitespace: bool,
|
||||
first_whitespace_offset: usize,
|
||||
},
|
||||
HasLineBreak {
|
||||
in_whitespace: bool,
|
||||
ret: String,
|
||||
},
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user