diff --git a/src/parser/latex_fragment.rs b/src/parser/latex_fragment.rs index 75073eb..65ef846 100644 --- a/src/parser/latex_fragment.rs +++ b/src/parser/latex_fragment.rs @@ -21,7 +21,6 @@ use crate::error::Res; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; -use crate::parser::util::get_one_before; use crate::parser::LatexFragment; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -175,10 +174,7 @@ fn dollar_char_fragment<'r, 's>( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); + let preceding_character = input.get_preceding_character(); if let Some('$') = preceding_character { return Err(nom::Err::Error(CustomError::MyError(MyError( "Not a valid pre character for dollar char fragment.".into(), @@ -239,10 +235,7 @@ pub fn close_border<'r, 's>( context: Context<'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); + let preceding_character = input.get_preceding_character(); match preceding_character { Some(c) if !c.is_whitespace() && !".,;$".contains(c) => Ok((input, ())), _ => { diff --git a/src/parser/line_break.rs b/src/parser/line_break.rs index 6d359e5..cc08400 100644 --- a/src/parser/line_break.rs +++ b/src/parser/line_break.rs @@ -10,8 +10,6 @@ use crate::error::CustomError; use crate::error::MyError; use crate::error::Res; use crate::parser::util::get_consumed; -use crate::parser::util::get_current_line_before_position; -use crate::parser::util::get_one_before; use crate::parser::LineBreak; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -34,10 +32,7 @@ pub fn line_break<'r, 's>( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); + let preceding_character = input.get_preceding_character(); match preceding_character { // If None, we are at the start of the file None | Some('\\') => { @@ -47,21 +42,13 @@ fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res {} }; - let current_line = get_current_line_before_position(document_root, input); - match current_line { - Some(line) => { - let is_non_empty_line = Into::<&str>::into(line).chars().any(|c| !c.is_whitespace()); - if !is_non_empty_line { - return Err(nom::Err::Error(CustomError::MyError(MyError( - "Not a valid pre line for line break.".into(), - )))); - } - } - None => { - return Err(nom::Err::Error(CustomError::MyError(MyError( - "No preceding line for line break.".into(), - )))); - } + + let current_line = input.text_since_line_break(); + let is_non_empty_line = current_line.chars().any(|c| !c.is_whitespace()); + if !is_non_empty_line { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Not a valid pre line for line break.".into(), + )))); } Ok((input, ())) diff --git a/src/parser/org_source.rs b/src/parser/org_source.rs index a3ac2ac..5ebc037 100644 --- a/src/parser/org_source.rs +++ b/src/parser/org_source.rs @@ -17,6 +17,7 @@ pub struct OrgSource<'s> { start: usize, end: usize, // exclusive preceding_line_break: Option, + preceding_character: Option, } impl<'s> OrgSource<'s> { @@ -29,6 +30,7 @@ impl<'s> OrgSource<'s> { start: 0, end: input.len(), preceding_line_break: None, + preceding_character: None, } } @@ -41,6 +43,17 @@ impl<'s> OrgSource<'s> { pub fn len(&self) -> usize { self.end - self.start } + + pub fn get_preceding_character(&self) -> Option { + todo!() + } + + pub fn is_at_start_of_line(&self) -> bool { + self.start == 0 || match self.preceding_line_break { + Some(offset) => offset == self.start - 1, + None => false, + } + } } impl<'s> InputTake for OrgSource<'s> { @@ -114,6 +127,7 @@ where start: new_start, end: new_end, preceding_line_break: last_line_feed, + preceding_character: skipped_text.chars().last() } } } @@ -325,4 +339,22 @@ mod tests { assert_eq!(input.slice(6..).text_since_line_break(), ""); assert_eq!(input.slice(6..).slice(10..).text_since_line_break(), "ba"); } + + #[test] + fn preceding_character() { + let input = OrgSource::new("lorem\nfoo\nbar\nbaz\nipsum"); + assert_eq!(input.get_preceding_character(), None); + assert_eq!(input.slice(5..).get_preceding_character(), Some('m')); + assert_eq!(input.slice(6..).get_preceding_character(), Some('\n')); + assert_eq!(input.slice(6..).slice(10..).get_preceding_character(), Some('a')); + } + + #[test] + fn is_at_start_of_line() { + let input = OrgSource::new("lorem\nfoo\nbar\nbaz\nipsum"); + assert_eq!(input.is_at_start_of_line(), true); + assert_eq!(input.slice(5..).is_at_start_of_line(), false); + assert_eq!(input.slice(6..).is_at_start_of_line(), true); + assert_eq!(input.slice(6..).slice(10..).is_at_start_of_line(), false); + } } diff --git a/src/parser/plain_link.rs b/src/parser/plain_link.rs index 8c99a5d..534d6cb 100644 --- a/src/parser/plain_link.rs +++ b/src/parser/plain_link.rs @@ -21,7 +21,6 @@ use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; -use crate::parser::util::get_one_before; use crate::parser::util::WORD_CONSTITUENT_CHARACTERS; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -47,10 +46,7 @@ pub fn plain_link<'r, 's>( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); + let preceding_character = input.get_preceding_character(); match preceding_character { // If None, we are at the start of the file which is fine None => {} diff --git a/src/parser/radio_link.rs b/src/parser/radio_link.rs index 8f72205..1bef234 100644 --- a/src/parser/radio_link.rs +++ b/src/parser/radio_link.rs @@ -156,7 +156,7 @@ mod tests { crate::parser::Element::Paragraph(paragraph) => paragraph, _ => panic!("Should be a paragraph!"), }; - assert_eq!(remaining, ""); + assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!(first_paragraph.get_source(), "foo bar baz"); assert_eq!(first_paragraph.children.len(), 3); assert_eq!( @@ -183,12 +183,13 @@ mod tests { .with_additional_node(ContextElement::DocumentRoot(input)) .with_additional_node(ContextElement::RadioTarget(vec![&radio_target_match])); let paragraph_matcher = parser_with_context!(element(true))(&document_context); - let (remaining, first_paragraph) = paragraph_matcher(input).expect("Parse first paragraph"); + let (remaining, first_paragraph) = + paragraph_matcher(input.into()).expect("Parse first paragraph"); let first_paragraph = match first_paragraph { crate::parser::Element::Paragraph(paragraph) => paragraph, _ => panic!("Should be a paragraph!"), }; - assert_eq!(remaining, ""); + assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!(first_paragraph.get_source(), "foo *bar* baz"); assert_eq!(first_paragraph.children.len(), 3); assert_eq!( diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs index 529dc76..fa94468 100644 --- a/src/parser/subscript_and_superscript.rs +++ b/src/parser/subscript_and_superscript.rs @@ -25,7 +25,6 @@ use crate::parser::parser_context::SubscriptSuperscriptBrace; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; -use crate::parser::util::get_one_before; use crate::parser::Subscript; use crate::parser::Superscript; @@ -69,10 +68,7 @@ pub fn superscript<'r, 's>( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); + let preceding_character = input.get_preceding_character(); match preceding_character { Some(c) if !c.is_whitespace() => {} _ => { diff --git a/src/parser/target.rs b/src/parser/target.rs index 02c5aa1..026b31f 100644 --- a/src/parser/target.rs +++ b/src/parser/target.rs @@ -18,7 +18,6 @@ use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; -use crate::parser::util::get_one_before; use crate::parser::Target; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] @@ -41,10 +40,8 @@ pub fn target<'r, 's>( parser_with_context!(exit_matcher_parser)(&parser_context), ))(remaining)?; - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, remaining) - .map(|slice| slice.chars().next()) - .flatten() + let preceding_character = remaining + .get_preceding_character() .expect("We cannot be at the start of the file because we are inside a target."); if preceding_character.is_whitespace() { return Err(nom::Err::Error(CustomError::MyError(MyError( diff --git a/src/parser/text_markup.rs b/src/parser/text_markup.rs index fc340be..3dd98c7 100644 --- a/src/parser/text_markup.rs +++ b/src/parser/text_markup.rs @@ -29,7 +29,6 @@ use crate::parser::parser_with_context::parser_with_context; use crate::parser::radio_link::rematch_target; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; -use crate::parser::util::get_one_before; use crate::parser::util::preceded_by_whitespace; use crate::parser::Bold; use crate::parser::Code; @@ -261,10 +260,7 @@ fn _text_markup_string<'r, 's, 'x>( #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); + let preceding_character = input.get_preceding_character(); match preceding_character { // If None, we are at the start of the file which is technically the beginning of a line. None | Some('\r') | Some('\n') | Some(' ') | Some('\t') | Some('-') | Some('(') diff --git a/src/parser/util.rs b/src/parser/util.rs index 64ba6bf..a62197b 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -49,47 +49,6 @@ pub fn immediate_in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: false } -/// Get one character from before the current position. -pub fn get_one_before<'s>(document: &'s str, current_position: OrgSource<'s>) -> Option<&'s str> { - // TODO: This should be replaced with new logic now that we are wrapping the input type. - let current_position = Into::<&str>::into(¤t_position); - assert!(is_slice_of(document, current_position)); - if document.as_ptr() as usize == current_position.as_ptr() as usize { - return None; - } - let offset = current_position.as_ptr() as usize - document.as_ptr() as usize; - let previous_character_offset = document.floor_char_boundary(offset - 1); - Some(&document[previous_character_offset..offset]) -} - -/// Get the line current_position is on up until current_position -pub fn get_current_line_before_position<'s>( - document: &'s str, - current_position: OrgSource<'s>, -) -> Option> { - // TODO: This should be replaced with new logic now that we are wrapping the input type. - let current_position = Into::<&str>::into(¤t_position); - assert!(is_slice_of(document, current_position)); - if document.as_ptr() as usize == current_position.as_ptr() as usize { - return None; - } - let offset = current_position.as_ptr() as usize - document.as_ptr() as usize; - let mut previous_character_offset = offset; - loop { - let new_offset = document.floor_char_boundary(previous_character_offset - 1); - let new_line = &document[new_offset..offset]; - let leading_char = new_line - .chars() - .next() - .expect("Impossible to not have at least 1 character to read."); - if "\r\n".contains(leading_char) || new_offset == 0 { - break; - } - previous_character_offset = new_offset; - } - Some((&document[previous_character_offset..offset]).into()) -} - /// Check if the child string slice is a slice of the parent string slice. fn is_slice_of(parent: &str, child: &str) -> bool { let parent_start = parent.as_ptr() as usize; @@ -170,22 +129,13 @@ pub fn start_of_line<'r, 's>( context: Context<'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); - match preceding_character { - Some('\n') => {} - Some(_) => { - // Not at start of line, cannot be a heading - return Err(nom::Err::Error(CustomError::MyError(MyError( - "Not at start of line".into(), - )))); - } - // If None, we are at the start of the file which allows for headings - None => {} - }; - Ok((input, ())) + if input.is_at_start_of_line() { + Ok((input, ())) + } else { + Err(nom::Err::Error(CustomError::MyError(MyError( + "Not at start of line".into(), + )))) + } } /// Check that we are at the start of a line @@ -194,10 +144,7 @@ pub fn preceded_by_whitespace<'r, 's>( context: Context<'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); + let preceding_character = input.get_preceding_character(); match preceding_character { Some('\n') | Some('\r') | Some(' ') | Some('\t') => {} // If None, we are at the start of the file which is not allowed