From 720afa5d3227d90d0958a9a829484f7ea540ea44 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 24 Aug 2023 16:55:56 -0400 Subject: [PATCH] Update getting the previous character and previous line. This can be done a lot more efficiently now that we are keeping track of this information in the wrapped input type instead of having to fetch to the original document out of the context tree. --- src/parser/latex_fragment.rs | 11 +--- src/parser/line_break.rs | 29 +++-------- src/parser/org_source.rs | 32 ++++++++++++ src/parser/plain_link.rs | 6 +-- src/parser/radio_link.rs | 7 +-- src/parser/subscript_and_superscript.rs | 6 +-- src/parser/target.rs | 7 +-- src/parser/text_markup.rs | 6 +-- src/parser/util.rs | 69 +++---------------------- 9 files changed, 59 insertions(+), 114 deletions(-) diff --git a/src/parser/latex_fragment.rs b/src/parser/latex_fragment.rs index 75073eb..65ef846 100644 --- a/src/parser/latex_fragment.rs +++ b/src/parser/latex_fragment.rs @@ -21,7 +21,6 @@ use crate::error::Res; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; -use crate::parser::util::get_one_before; use crate::parser::LatexFragment; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -175,10 +174,7 @@ fn dollar_char_fragment<'r, 's>( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); + let preceding_character = input.get_preceding_character(); if let Some('$') = preceding_character { return Err(nom::Err::Error(CustomError::MyError(MyError( "Not a valid pre character for dollar char fragment.".into(), @@ -239,10 +235,7 @@ pub fn close_border<'r, 's>( context: Context<'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); + let preceding_character = input.get_preceding_character(); match preceding_character { Some(c) if !c.is_whitespace() && !".,;$".contains(c) => Ok((input, ())), _ => { diff --git a/src/parser/line_break.rs b/src/parser/line_break.rs index 6d359e5..cc08400 100644 --- a/src/parser/line_break.rs +++ b/src/parser/line_break.rs @@ -10,8 +10,6 @@ use crate::error::CustomError; use crate::error::MyError; use crate::error::Res; use crate::parser::util::get_consumed; -use crate::parser::util::get_current_line_before_position; -use crate::parser::util::get_one_before; use crate::parser::LineBreak; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -34,10 +32,7 @@ pub fn line_break<'r, 's>( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); + let preceding_character = input.get_preceding_character(); match preceding_character { // If None, we are at the start of the file None | Some('\\') => { @@ -47,21 +42,13 @@ fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res {} }; - let current_line = get_current_line_before_position(document_root, input); - match current_line { - Some(line) => { - let is_non_empty_line = Into::<&str>::into(line).chars().any(|c| !c.is_whitespace()); - if !is_non_empty_line { - return Err(nom::Err::Error(CustomError::MyError(MyError( - "Not a valid pre line for line break.".into(), - )))); - } - } - None => { - return Err(nom::Err::Error(CustomError::MyError(MyError( - "No preceding line for line break.".into(), - )))); - } + + let current_line = input.text_since_line_break(); + let is_non_empty_line = current_line.chars().any(|c| !c.is_whitespace()); + if !is_non_empty_line { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Not a valid pre line for line break.".into(), + )))); } Ok((input, ())) diff --git a/src/parser/org_source.rs b/src/parser/org_source.rs index a3ac2ac..5ebc037 100644 --- a/src/parser/org_source.rs +++ b/src/parser/org_source.rs @@ -17,6 +17,7 @@ pub struct OrgSource<'s> { start: usize, end: usize, // exclusive preceding_line_break: Option, + preceding_character: Option, } impl<'s> OrgSource<'s> { @@ -29,6 +30,7 @@ impl<'s> OrgSource<'s> { start: 0, end: input.len(), preceding_line_break: None, + preceding_character: None, } } @@ -41,6 +43,17 @@ impl<'s> OrgSource<'s> { pub fn len(&self) -> usize { self.end - self.start } + + pub fn get_preceding_character(&self) -> Option { + todo!() + } + + pub fn is_at_start_of_line(&self) -> bool { + self.start == 0 || match self.preceding_line_break { + Some(offset) => offset == self.start - 1, + None => false, + } + } } impl<'s> InputTake for OrgSource<'s> { @@ -114,6 +127,7 @@ where start: new_start, end: new_end, preceding_line_break: last_line_feed, + preceding_character: skipped_text.chars().last() } } } @@ -325,4 +339,22 @@ mod tests { assert_eq!(input.slice(6..).text_since_line_break(), ""); assert_eq!(input.slice(6..).slice(10..).text_since_line_break(), "ba"); } + + #[test] + fn preceding_character() { + let input = OrgSource::new("lorem\nfoo\nbar\nbaz\nipsum"); + assert_eq!(input.get_preceding_character(), None); + assert_eq!(input.slice(5..).get_preceding_character(), Some('m')); + assert_eq!(input.slice(6..).get_preceding_character(), Some('\n')); + assert_eq!(input.slice(6..).slice(10..).get_preceding_character(), Some('a')); + } + + #[test] + fn is_at_start_of_line() { + let input = OrgSource::new("lorem\nfoo\nbar\nbaz\nipsum"); + assert_eq!(input.is_at_start_of_line(), true); + assert_eq!(input.slice(5..).is_at_start_of_line(), false); + assert_eq!(input.slice(6..).is_at_start_of_line(), true); + assert_eq!(input.slice(6..).slice(10..).is_at_start_of_line(), false); + } } diff --git a/src/parser/plain_link.rs b/src/parser/plain_link.rs index 8c99a5d..534d6cb 100644 --- a/src/parser/plain_link.rs +++ b/src/parser/plain_link.rs @@ -21,7 +21,6 @@ use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; -use crate::parser::util::get_one_before; use crate::parser::util::WORD_CONSTITUENT_CHARACTERS; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -47,10 +46,7 @@ pub fn plain_link<'r, 's>( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); + let preceding_character = input.get_preceding_character(); match preceding_character { // If None, we are at the start of the file which is fine None => {} diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index 8f72205..1bef234 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -156,7 +156,7 @@ mod tests { crate::parser::Element::Paragraph(paragraph) => paragraph, _ => panic!("Should be a paragraph!"), }; - assert_eq!(remaining, ""); + assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!(first_paragraph.get_source(), "foo bar baz"); assert_eq!(first_paragraph.children.len(), 3); assert_eq!( @@ -183,12 +183,13 @@ mod tests { .with_additional_node(ContextElement::DocumentRoot(input)) .with_additional_node(ContextElement::RadioTarget(vec![&radio_target_match])); let paragraph_matcher = parser_with_context!(element(true))(&document_context); - let (remaining, first_paragraph) = paragraph_matcher(input).expect("Parse first paragraph"); + let (remaining, first_paragraph) = + paragraph_matcher(input.into()).expect("Parse first paragraph"); let first_paragraph = match first_paragraph { crate::parser::Element::Paragraph(paragraph) => paragraph, _ => panic!("Should be a paragraph!"), }; - assert_eq!(remaining, ""); + assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!(first_paragraph.get_source(), "foo *bar* baz"); assert_eq!(first_paragraph.children.len(), 3); assert_eq!( diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs index 529dc76..fa94468 100644 --- a/src/parser/subscript_and_superscript.rs +++ b/src/parser/subscript_and_superscript.rs @@ -25,7 +25,6 @@ use crate::parser::parser_context::SubscriptSuperscriptBrace; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; -use crate::parser::util::get_one_before; use crate::parser::Subscript; use crate::parser::Superscript; @@ -69,10 +68,7 @@ pub fn superscript<'r, 's>( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); + let preceding_character = input.get_preceding_character(); match preceding_character { Some(c) if !c.is_whitespace() => {} _ => { diff --git a/src/parser/target.rs b/src/parser/target.rs index 02c5aa1..026b31f 100644 --- a/src/parser/target.rs +++ b/src/parser/target.rs @@ -18,7 +18,6 @@ use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; -use crate::parser::util::get_one_before; use crate::parser::Target; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -41,10 +40,8 @@ pub fn target<'r, 's>( parser_with_context!(exit_matcher_parser)(&parser_context), ))(remaining)?; - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, remaining) - .map(|slice| slice.chars().next()) - .flatten() + let preceding_character = remaining + .get_preceding_character() .expect("We cannot be at the start of the file because we are inside a target."); if preceding_character.is_whitespace() { return Err(nom::Err::Error(CustomError::MyError(MyError( diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index fc340be..3dd98c7 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -29,7 +29,6 @@ use crate::parser::parser_with_context::parser_with_context; use crate::parser::radio_link::rematch_target; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; -use crate::parser::util::get_one_before; use crate::parser::util::preceded_by_whitespace; use crate::parser::Bold; use crate::parser::Code; @@ -261,10 +260,7 @@ fn _text_markup_string<'r, 's, 'x>( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); + let preceding_character = input.get_preceding_character(); match preceding_character { // If None, we are at the start of the file which is technically the beginning of a line. None | Some('\r') | Some('\n') | Some(' ') | Some('\t') | Some('-') | Some('(') diff --git a/src/parser/util.rs b/src/parser/util.rs index 64ba6bf..a62197b 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -49,47 +49,6 @@ pub fn immediate_in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: false } -/// Get one character from before the current position. -pub fn get_one_before<'s>(document: &'s str, current_position: OrgSource<'s>) -> Option<&'s str> { - // TODO: This should be replaced with new logic now that we are wrapping the input type. - let current_position = Into::<&str>::into(¤t_position); - assert!(is_slice_of(document, current_position)); - if document.as_ptr() as usize == current_position.as_ptr() as usize { - return None; - } - let offset = current_position.as_ptr() as usize - document.as_ptr() as usize; - let previous_character_offset = document.floor_char_boundary(offset - 1); - Some(&document[previous_character_offset..offset]) -} - -/// Get the line current_position is on up until current_position -pub fn get_current_line_before_position<'s>( - document: &'s str, - current_position: OrgSource<'s>, -) -> Option> { - // TODO: This should be replaced with new logic now that we are wrapping the input type. - let current_position = Into::<&str>::into(¤t_position); - assert!(is_slice_of(document, current_position)); - if document.as_ptr() as usize == current_position.as_ptr() as usize { - return None; - } - let offset = current_position.as_ptr() as usize - document.as_ptr() as usize; - let mut previous_character_offset = offset; - loop { - let new_offset = document.floor_char_boundary(previous_character_offset - 1); - let new_line = &document[new_offset..offset]; - let leading_char = new_line - .chars() - .next() - .expect("Impossible to not have at least 1 character to read."); - if "\r\n".contains(leading_char) || new_offset == 0 { - break; - } - previous_character_offset = new_offset; - } - Some((&document[previous_character_offset..offset]).into()) -} - /// Check if the child string slice is a slice of the parent string slice. fn is_slice_of(parent: &str, child: &str) -> bool { let parent_start = parent.as_ptr() as usize; @@ -170,22 +129,13 @@ pub fn start_of_line<'r, 's>( context: Context<'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); - match preceding_character { - Some('\n') => {} - Some(_) => { - // Not at start of line, cannot be a heading - return Err(nom::Err::Error(CustomError::MyError(MyError( - "Not at start of line".into(), - )))); - } - // If None, we are at the start of the file which allows for headings - None => {} - }; - Ok((input, ())) + if input.is_at_start_of_line() { + Ok((input, ())) + } else { + Err(nom::Err::Error(CustomError::MyError(MyError( + "Not at start of line".into(), + )))) + } } /// Check that we are at the start of a line @@ -194,10 +144,7 @@ pub fn preceded_by_whitespace<'r, 's>( context: Context<'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); + let preceding_character = input.get_preceding_character(); match preceding_character { Some('\n') | Some('\r') | Some(' ') | Some('\t') => {} // If None, we are at the start of the file which is not allowed