Update getting the previous character and previous line.

This can be done a lot more efficiently now that we are keeping track of this information in the wrapped input type instead of having to fetch to the original document out of the context tree.
2023-08-24 16:55:56 -04:00 · 2023-08-24 16:55:56 -04:00 · 720afa5d32
commit 720afa5d32
parent dab598e5e7
9 changed files with 59 additions and 114 deletions
--- a/src/parser/latex_fragment.rs
+++ b/src/parser/latex_fragment.rs
@ -21,7 +21,6 @@ use crate::error::Res;
 use crate::parser::parser_with_context::parser_with_context;
 use crate::parser::util::exit_matcher_parser;
 use crate::parser::util::get_consumed;
-use crate::parser::util::get_one_before;
 use crate::parser::LatexFragment;

 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -175,10 +174,7 @@ fn dollar_char_fragment<'r, 's>(

 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
 pub fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
-    let document_root = context.get_document_root().unwrap();
-    let preceding_character = get_one_before(document_root, input)
-        .map(|slice| slice.chars().next())
-        .flatten();
+    let preceding_character = input.get_preceding_character();
    if let Some('$') = preceding_character {
        return Err(nom::Err::Error(CustomError::MyError(MyError(
            "Not a valid pre character for dollar char fragment.".into(),
@ -239,10 +235,7 @@ pub fn close_border<'r, 's>(
    context: Context<'r, 's>,
    input: OrgSource<'s>,
 ) -> Res<OrgSource<'s>, ()> {
-    let document_root = context.get_document_root().unwrap();
-    let preceding_character = get_one_before(document_root, input)
-        .map(|slice| slice.chars().next())
-        .flatten();
+    let preceding_character = input.get_preceding_character();
    match preceding_character {
        Some(c) if !c.is_whitespace() && !".,;$".contains(c) => Ok((input, ())),
        _ => {
--- a/src/parser/line_break.rs
+++ b/src/parser/line_break.rs
@ -10,8 +10,6 @@ use crate::error::CustomError;
 use crate::error::MyError;
 use crate::error::Res;
 use crate::parser::util::get_consumed;
-use crate::parser::util::get_current_line_before_position;
-use crate::parser::util::get_one_before;
 use crate::parser::LineBreak;

 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -34,10 +32,7 @@ pub fn line_break<'r, 's>(

 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
 fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
-    let document_root = context.get_document_root().unwrap();
-    let preceding_character = get_one_before(document_root, input)
-        .map(|slice| slice.chars().next())
-        .flatten();
+    let preceding_character = input.get_preceding_character();
    match preceding_character {
        // If None, we are at the start of the file
        None | Some('\\') => {
@ -47,21 +42,13 @@ fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<
        }
        _ => {}
    };
-    let current_line = get_current_line_before_position(document_root, input);
-    match current_line {
-        Some(line) => {
-            let is_non_empty_line = Into::<&str>::into(line).chars().any(|c| !c.is_whitespace());
-            if !is_non_empty_line {
-                return Err(nom::Err::Error(CustomError::MyError(MyError(
-                    "Not a valid pre line for line break.".into(),
-                ))));
-            }
-        }
-        None => {
-            return Err(nom::Err::Error(CustomError::MyError(MyError(
-                "No preceding line for line break.".into(),
-            ))));
-        }
+
+    let current_line = input.text_since_line_break();
+    let is_non_empty_line = current_line.chars().any(|c| !c.is_whitespace());
+    if !is_non_empty_line {
+        return Err(nom::Err::Error(CustomError::MyError(MyError(
+            "Not a valid pre line for line break.".into(),
+        ))));
    }

    Ok((input, ()))
--- a/src/parser/org_source.rs
+++ b/src/parser/org_source.rs
@ -17,6 +17,7 @@ pub struct OrgSource<'s> {
    start: usize,
    end: usize, // exclusive
    preceding_line_break: Option<usize>,
+    preceding_character: Option<char>,
 }

 impl<'s> OrgSource<'s> {
@ -29,6 +30,7 @@ impl<'s> OrgSource<'s> {
            start: 0,
            end: input.len(),
            preceding_line_break: None,
+            preceding_character: None,
        }
    }

@ -41,6 +43,17 @@ impl<'s> OrgSource<'s> {
    pub fn len(&self) -> usize {
        self.end - self.start
    }
+
+    pub fn get_preceding_character(&self) -> Option<char> {
+        todo!()
+    }
+
+    pub fn is_at_start_of_line(&self) -> bool {
+        self.start == 0 || match self.preceding_line_break {
+            Some(offset) => offset == self.start - 1,
+            None => false,
+        }
+    }
 }

 impl<'s> InputTake for OrgSource<'s> {
@ -114,6 +127,7 @@ where
            start: new_start,
            end: new_end,
            preceding_line_break: last_line_feed,
+            preceding_character: skipped_text.chars().last()
        }
    }
 }
@ -325,4 +339,22 @@ mod tests {
        assert_eq!(input.slice(6..).text_since_line_break(), "");
        assert_eq!(input.slice(6..).slice(10..).text_since_line_break(), "ba");
    }
+
+    #[test]
+    fn preceding_character() {
+        let input = OrgSource::new("lorem\nfoo\nbar\nbaz\nipsum");
+        assert_eq!(input.get_preceding_character(), None);
+        assert_eq!(input.slice(5..).get_preceding_character(), Some('m'));
+        assert_eq!(input.slice(6..).get_preceding_character(), Some('\n'));
+        assert_eq!(input.slice(6..).slice(10..).get_preceding_character(), Some('a'));
+    }
+
+    #[test]
+    fn is_at_start_of_line() {
+        let input = OrgSource::new("lorem\nfoo\nbar\nbaz\nipsum");
+        assert_eq!(input.is_at_start_of_line(), true);
+        assert_eq!(input.slice(5..).is_at_start_of_line(), false);
+        assert_eq!(input.slice(6..).is_at_start_of_line(), true);
+        assert_eq!(input.slice(6..).slice(10..).is_at_start_of_line(), false);
+    }
 }
--- a/src/parser/plain_link.rs
+++ b/src/parser/plain_link.rs
@ -21,7 +21,6 @@ use crate::parser::parser_context::ExitMatcherNode;
 use crate::parser::parser_with_context::parser_with_context;
 use crate::parser::util::exit_matcher_parser;
 use crate::parser::util::get_consumed;
-use crate::parser::util::get_one_before;
 use crate::parser::util::WORD_CONSTITUENT_CHARACTERS;

 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -47,10 +46,7 @@ pub fn plain_link<'r, 's>(

 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
 fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
-    let document_root = context.get_document_root().unwrap();
-    let preceding_character = get_one_before(document_root, input)
-        .map(|slice| slice.chars().next())
-        .flatten();
+    let preceding_character = input.get_preceding_character();
    match preceding_character {
        // If None, we are at the start of the file which is fine
        None => {}
--- a/src/parser/radio_link.rs
+++ b/src/parser/radio_link.rs
@ -156,7 +156,7 @@ mod tests {
            crate::parser::Element::Paragraph(paragraph) => paragraph,
            _ => panic!("Should be a paragraph!"),
        };
-        assert_eq!(remaining, "");
+        assert_eq!(Into::<&str>::into(remaining), "");
        assert_eq!(first_paragraph.get_source(), "foo bar baz");
        assert_eq!(first_paragraph.children.len(), 3);
        assert_eq!(
@ -183,12 +183,13 @@ mod tests {
            .with_additional_node(ContextElement::DocumentRoot(input))
            .with_additional_node(ContextElement::RadioTarget(vec![&radio_target_match]));
        let paragraph_matcher = parser_with_context!(element(true))(&document_context);
-        let (remaining, first_paragraph) = paragraph_matcher(input).expect("Parse first paragraph");
+        let (remaining, first_paragraph) =
+            paragraph_matcher(input.into()).expect("Parse first paragraph");
        let first_paragraph = match first_paragraph {
            crate::parser::Element::Paragraph(paragraph) => paragraph,
            _ => panic!("Should be a paragraph!"),
        };
-        assert_eq!(remaining, "");
+        assert_eq!(Into::<&str>::into(remaining), "");
        assert_eq!(first_paragraph.get_source(), "foo *bar* baz");
        assert_eq!(first_paragraph.children.len(), 3);
        assert_eq!(
--- a/src/parser/subscript_and_superscript.rs
+++ b/src/parser/subscript_and_superscript.rs
@ -25,7 +25,6 @@ use crate::parser::parser_context::SubscriptSuperscriptBrace;
 use crate::parser::parser_with_context::parser_with_context;
 use crate::parser::util::exit_matcher_parser;
 use crate::parser::util::get_consumed;
-use crate::parser::util::get_one_before;
 use crate::parser::Subscript;
 use crate::parser::Superscript;

@ -69,10 +68,7 @@ pub fn superscript<'r, 's>(

 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
 fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
-    let document_root = context.get_document_root().unwrap();
-    let preceding_character = get_one_before(document_root, input)
-        .map(|slice| slice.chars().next())
-        .flatten();
+    let preceding_character = input.get_preceding_character();
    match preceding_character {
        Some(c) if !c.is_whitespace() => {}
        _ => {
--- a/src/parser/target.rs
+++ b/src/parser/target.rs
@ -18,7 +18,6 @@ use crate::parser::parser_context::ExitMatcherNode;
 use crate::parser::parser_with_context::parser_with_context;
 use crate::parser::util::exit_matcher_parser;
 use crate::parser::util::get_consumed;
-use crate::parser::util::get_one_before;
 use crate::parser::Target;

 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -41,10 +40,8 @@ pub fn target<'r, 's>(
        parser_with_context!(exit_matcher_parser)(&parser_context),
    ))(remaining)?;

-    let document_root = context.get_document_root().unwrap();
-    let preceding_character = get_one_before(document_root, remaining)
-        .map(|slice| slice.chars().next())
-        .flatten()
+    let preceding_character = remaining
+        .get_preceding_character()
        .expect("We cannot be at the start of the file because we are inside a target.");
    if preceding_character.is_whitespace() {
        return Err(nom::Err::Error(CustomError::MyError(MyError(
--- a/src/parser/text_markup.rs
+++ b/src/parser/text_markup.rs
@ -29,7 +29,6 @@ use crate::parser::parser_with_context::parser_with_context;
 use crate::parser::radio_link::rematch_target;
 use crate::parser::util::exit_matcher_parser;
 use crate::parser::util::get_consumed;
-use crate::parser::util::get_one_before;
 use crate::parser::util::preceded_by_whitespace;
 use crate::parser::Bold;
 use crate::parser::Code;
@ -261,10 +260,7 @@ fn _text_markup_string<'r, 's, 'x>(

 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
 pub fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
-    let document_root = context.get_document_root().unwrap();
-    let preceding_character = get_one_before(document_root, input)
-        .map(|slice| slice.chars().next())
-        .flatten();
+    let preceding_character = input.get_preceding_character();
    match preceding_character {
        // If None, we are at the start of the file which is technically the beginning of a line.
        None | Some('\r') | Some('\n') | Some(' ') | Some('\t') | Some('-') | Some('(')
--- a/src/parser/util.rs
+++ b/src/parser/util.rs
@ -49,47 +49,6 @@ pub fn immediate_in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name:
    false
 }

-/// Get one character from before the current position.
-pub fn get_one_before<'s>(document: &'s str, current_position: OrgSource<'s>) -> Option<&'s str> {
-    // TODO: This should be replaced with new logic now that we are wrapping the input type.
-    let current_position = Into::<&str>::into(&current_position);
-    assert!(is_slice_of(document, current_position));
-    if document.as_ptr() as usize == current_position.as_ptr() as usize {
-        return None;
-    }
-    let offset = current_position.as_ptr() as usize - document.as_ptr() as usize;
-    let previous_character_offset = document.floor_char_boundary(offset - 1);
-    Some(&document[previous_character_offset..offset])
-}
-
-/// Get the line current_position is on up until current_position
-pub fn get_current_line_before_position<'s>(
-    document: &'s str,
-    current_position: OrgSource<'s>,
-) -> Option<OrgSource<'s>> {
-    // TODO: This should be replaced with new logic now that we are wrapping the input type.
-    let current_position = Into::<&str>::into(&current_position);
-    assert!(is_slice_of(document, current_position));
-    if document.as_ptr() as usize == current_position.as_ptr() as usize {
-        return None;
-    }
-    let offset = current_position.as_ptr() as usize - document.as_ptr() as usize;
-    let mut previous_character_offset = offset;
-    loop {
-        let new_offset = document.floor_char_boundary(previous_character_offset - 1);
-        let new_line = &document[new_offset..offset];
-        let leading_char = new_line
-            .chars()
-            .next()
-            .expect("Impossible to not have at least 1 character to read.");
-        if "\r\n".contains(leading_char) || new_offset == 0 {
-            break;
-        }
-        previous_character_offset = new_offset;
-    }
-    Some((&document[previous_character_offset..offset]).into())
-}
-
 /// Check if the child string slice is a slice of the parent string slice.
 fn is_slice_of(parent: &str, child: &str) -> bool {
    let parent_start = parent.as_ptr() as usize;
@ -170,22 +129,13 @@ pub fn start_of_line<'r, 's>(
    context: Context<'r, 's>,
    input: OrgSource<'s>,
 ) -> Res<OrgSource<'s>, ()> {
-    let document_root = context.get_document_root().unwrap();
-    let preceding_character = get_one_before(document_root, input)
-        .map(|slice| slice.chars().next())
-        .flatten();
-    match preceding_character {
-        Some('\n') => {}
-        Some(_) => {
-            // Not at start of line, cannot be a heading
-            return Err(nom::Err::Error(CustomError::MyError(MyError(
-                "Not at start of line".into(),
-            ))));
-        }
-        // If None, we are at the start of the file which allows for headings
-        None => {}
-    };
-    Ok((input, ()))
+    if input.is_at_start_of_line() {
+        Ok((input, ()))
+    } else {
+        Err(nom::Err::Error(CustomError::MyError(MyError(
+            "Not at start of line".into(),
+        ))))
+    }
 }

 /// Check that we are at the start of a line
@ -194,10 +144,7 @@ pub fn preceded_by_whitespace<'r, 's>(
    context: Context<'r, 's>,
    input: OrgSource<'s>,
 ) -> Res<OrgSource<'s>, ()> {
-    let document_root = context.get_document_root().unwrap();
-    let preceding_character = get_one_before(document_root, input)
-        .map(|slice| slice.chars().next())
-        .flatten();
+    let preceding_character = input.get_preceding_character();
    match preceding_character {
        Some('\n') | Some('\r') | Some(' ') | Some('\t') => {}
        // If None, we are at the start of the file which is not allowed