Switch to using a similar optimized Cow function for regular link.

2023-10-08 14:11:46 -04:00
parent 0e791e67ab
commit 42dbda494a
2 changed files with 131 additions and 58 deletions
--- a/src/types/object.rs
+++ b/src/types/object.rs
@@ -1,6 +1,7 @@
 use std::borrow::Borrow;
 use std::borrow::Cow;

+use super::util::coalesce_whitespace_if_line_break;
 use super::util::remove_line_break;
 use super::util::remove_whitespace_if_line_break;
 use super::GetStandardProperties;
@@ -665,67 +666,22 @@ pub enum LinkType<'s> {
    Fuzzy,
 }

-#[derive(Debug)]
-enum ParserState {
-    Normal,
-    InWhitespace,
-}
-
-/// Org-mode treats multiple consecutive whitespace characters as a single space. This function performs that transformation.
-///
-/// Example: `orgify_text("foo \t\n bar") == "foo bar"`
-pub(crate) fn orgify_text<T: AsRef<str>>(raw_text: T) -> String {
-    let raw_text = raw_text.as_ref();
-    let mut ret = String::with_capacity(raw_text.len());
-    let mut state = ParserState::Normal;
-    for c in raw_text.chars() {
-        state = match (&state, c) {
-            (ParserState::Normal, _) if " \t\r\n".contains(c) => {
-                ret.push(' ');
-                ParserState::InWhitespace
-            }
-            (ParserState::InWhitespace, _) if " \t\r\n".contains(c) => ParserState::InWhitespace,
-            (ParserState::Normal, _) => {
-                ret.push(c);
-                ParserState::Normal
-            }
-            (ParserState::InWhitespace, _) => {
-                ret.push(c);
-                ParserState::Normal
-            }
-        };
-    }
-    ret
-}
-
 impl<'s> RegularLink<'s> {
    /// Orgify the raw_link if it contains line breaks.
-    pub fn get_raw_link(&self) -> String {
-        if self.raw_link.contains('\n') {
-            orgify_text(Borrow::<str>::borrow(&self.raw_link))
-        } else {
-            self.raw_link.clone().into_owned()
-        }
+    pub fn get_raw_link(&'s self) -> Cow<'s, str> {
+        coalesce_whitespace_if_line_break(&self.raw_link)
    }

    /// Orgify the path if it contains line breaks.
-    pub fn get_path(&self) -> String {
-        if self.path.contains('\n') {
-            orgify_text(Borrow::<str>::borrow(&self.path))
-        } else {
-            self.path.clone().into_owned()
-        }
+    pub fn get_path(&'s self) -> Cow<'s, str> {
+        coalesce_whitespace_if_line_break(&self.path)
    }

    /// Orgify the search_option if it contains line breaks.
-    pub fn get_search_option(&self) -> Option<String> {
-        self.search_option.as_ref().map(|search_option| {
-            if search_option.contains('\n') {
-                orgify_text(search_option)
-            } else {
-                search_option.clone().into_owned()
-            }
-        })
+    pub fn get_search_option(&'s self) -> Option<Cow<'s, str>> {
+        self.search_option
+            .as_ref()
+            .map(|search_option| coalesce_whitespace_if_line_break(search_option.borrow()))
    }
 }

@@ -735,11 +691,6 @@ impl<'s> RadioLink<'s> {
    }
 }

-enum PathState {
-    Normal,
-    HasLineBreak(String),
-}
-
 impl<'s> AngleLink<'s> {
    /// Remove line breaks but preserve multiple consecutive spaces.
    pub fn get_path(&self) -> Cow<'s, str> {
--- a/src/types/util.rs
+++ b/src/types/util.rs
@@ -75,3 +75,125 @@ enum RemoveLineBreakState {
    Normal,
    HasLineBreak(String),
 }
+
+/// Removes all whitespace from a string if any line breaks are present.
+///
+/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foobar".
+pub(crate) fn coalesce_whitespace_if_line_break<'s>(input: &'s str) -> Cow<'s, str> {
+    let mut state = CoalesceWhitespaceIfLineBreakState::Normal;
+    for (offset, c) in input.char_indices() {
+        match (&mut state, c) {
+            (CoalesceWhitespaceIfLineBreakState::Normal, '\n') => {
+                // Hit line break without any preceding whitespace
+                let mut ret = String::with_capacity(input.len());
+                ret.push_str(&input[..offset]);
+                ret.push(' ');
+                state = CoalesceWhitespaceIfLineBreakState::HasLineBreak {
+                    in_whitespace: true,
+                    ret,
+                };
+            }
+            (CoalesceWhitespaceIfLineBreakState::Normal, ' ' | '\t') => {
+                state = CoalesceWhitespaceIfLineBreakState::HasWhitespace {
+                    in_whitespace: true,
+                    first_whitespace_offset: offset,
+                };
+            }
+            (CoalesceWhitespaceIfLineBreakState::Normal, _) => {}
+
+            (
+                CoalesceWhitespaceIfLineBreakState::HasWhitespace {
+                    in_whitespace,
+                    first_whitespace_offset,
+                },
+                '\n',
+            ) => {
+                // Hit line break with preceding whitespace so we add all the text up to the first whitespace and then process the remaining text coalescing the whitespace.
+                let mut ret = String::with_capacity(input.len());
+                ret.push_str(&input[..*first_whitespace_offset]);
+                let mut sub_loop_in_whitespace = false;
+                for c in input[*first_whitespace_offset..offset].chars() {
+                    if sub_loop_in_whitespace {
+                        if !c.is_ascii_whitespace() {
+                            // Preceding character was whitespace but this is not.
+                            sub_loop_in_whitespace = false;
+                            ret.push(c);
+                        }
+                        // Do nothing if preceding character was whitespace and this character also is whitespace.
+                    } else {
+                        if c.is_ascii_whitespace() {
+                            // Preceding character was not whitespace but this is.
+                            sub_loop_in_whitespace = true;
+                            ret.push(' ');
+                        } else {
+                            // Preceding character was not whitespace and this is not either.
+                            ret.push(c);
+                        }
+                    }
+                }
+                if !*in_whitespace {
+                    // If this line break was the start of whitespace then we need to inject a space character for it.
+                    ret.push(' ');
+                }
+                state = CoalesceWhitespaceIfLineBreakState::HasLineBreak {
+                    in_whitespace: true, // This was triggered by a line break which is whitespace.
+                    ret,
+                };
+            }
+            (
+                CoalesceWhitespaceIfLineBreakState::HasWhitespace {
+                    in_whitespace,
+                    first_whitespace_offset: _,
+                },
+                ' ' | '\t',
+            ) => {
+                *in_whitespace = true;
+            }
+            (
+                CoalesceWhitespaceIfLineBreakState::HasWhitespace {
+                    in_whitespace,
+                    first_whitespace_offset: _,
+                },
+                _,
+            ) => {
+                *in_whitespace = false;
+            }
+            (
+                CoalesceWhitespaceIfLineBreakState::HasLineBreak { in_whitespace, ret },
+                ' ' | '\t' | '\r' | '\n',
+            ) => {
+                if !*in_whitespace {
+                    ret.push(' ');
+                }
+                *in_whitespace = true;
+            }
+            (CoalesceWhitespaceIfLineBreakState::HasLineBreak { in_whitespace, ret }, _) => {
+                *in_whitespace = false;
+                ret.push(c);
+            }
+        }
+    }
+    match state {
+        CoalesceWhitespaceIfLineBreakState::Normal => Cow::Borrowed(input),
+        CoalesceWhitespaceIfLineBreakState::HasWhitespace {
+            in_whitespace: _,
+            first_whitespace_offset: _,
+        } => Cow::Borrowed(input),
+        CoalesceWhitespaceIfLineBreakState::HasLineBreak {
+            in_whitespace: _,
+            ret,
+        } => Cow::Owned(ret),
+    }
+}
+
+enum CoalesceWhitespaceIfLineBreakState {
+    Normal,
+    HasWhitespace {
+        in_whitespace: bool,
+        first_whitespace_offset: usize,
+    },
+    HasLineBreak {
+        in_whitespace: bool,
+        ret: String,
+    },
+}