diff --git a/src/parser/document.rs b/src/parser/document.rs index 33f683d5..216b7690 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -100,10 +100,11 @@ pub fn document(input: &str) -> Res<&str, Document> { fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Section<'s>> { // TODO: The zeroth section is specialized so it probably needs its own parser let parser_context = context + .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true)) + .with_additional_node(ContextElement::Context("section")) .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { exit_matcher: ChainBehavior::AndParent(Some(§ion_end)), - })) - .with_additional_node(ContextElement::Context("section")); + })); let element_matcher = parser_with_context!(element)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); let trailing_matcher = parser_with_context!(element_trailing_whitespace)(&parser_context); diff --git a/src/parser/element.rs b/src/parser/element.rs index b38505c8..5a1b379e 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -9,6 +9,7 @@ use super::paragraph::paragraph; use super::plain_list::plain_list; use super::source::Source; use super::Context; +use crate::parser::parser_context::ContextElement; use crate::parser::parser_with_context::parser_with_context; use nom::branch::alt; use nom::combinator::map; @@ -37,6 +38,26 @@ impl<'s> Source<'s> for Element<'s> { } } +#[tracing::instrument(ret, level = "debug")] +pub fn element_with_whitespace<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, Element<'s>> { + let parser_context = + context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true)); + element(&parser_context, input) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn element_without_whitespace<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, Element<'s>> { + let parser_context = + context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false)); + element(&parser_context, input) +} + #[tracing::instrument(ret, level = "debug")] pub fn element<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Element<'s>> { let non_paragraph_matcher = parser_with_context!(non_paragraph_element)(context); diff --git a/src/parser/footnote_definition.rs b/src/parser/footnote_definition.rs index f4c6b7f2..5924c810 100644 --- a/src/parser/footnote_definition.rs +++ b/src/parser/footnote_definition.rs @@ -34,8 +34,9 @@ pub fn footnote_definition<'r, 's>( // Cannot be indented. let (remaining, (_lead_in, lbl, _lead_out, _ws)) = tuple((tag_no_case("[fn:"), label, tag("]"), space0))(input)?; - let parser_context = - context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + let parser_context = context + .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true)) + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { exit_matcher: ChainBehavior::IgnoreParent(Some(&footnote_definition_end)), })); // TODO: The problem is we are not accounting for trailing whitespace like we do in section. Maybe it would be easier if we passed down whether or not to parse trailing whitespace into the element matcher similar to how tag takes in parameters. diff --git a/src/parser/greater_block.rs b/src/parser/greater_block.rs index 3fce48cb..a3f3915a 100644 --- a/src/parser/greater_block.rs +++ b/src/parser/greater_block.rs @@ -41,10 +41,11 @@ pub fn greater_block<'r, 's>( let (remaining, parameters) = opt(tuple((space1, parameters)))(remaining)?; let (remaining, _nl) = line_ending(remaining)?; let parser_context = context + .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true)) + .with_additional_node(ContextElement::GreaterBlock(name)) .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { exit_matcher: ChainBehavior::IgnoreParent(Some(&greater_block_end)), - })) - .with_additional_node(ContextElement::GreaterBlock(name)); + })); let element_matcher = parser_with_context!(element)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 5bbb8a72..9c2a411f 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -115,6 +115,26 @@ impl<'r, 's> ContextTree<'r, 's> { } None } + + /// Indicates if elements should consume the whitespace after them. + /// + /// Defaults to true. + pub fn should_consume_trailing_whitespace(&self) -> bool { + self._should_consume_trailing_whitespace().unwrap_or(true) + } + + fn _should_consume_trailing_whitespace(&self) -> Option { + for current_node in self.iter() { + let context_element = current_node.get_data(); + match context_element { + ContextElement::ConsumeTrailingWhitespace(should) => { + return Some(*should); + } + _ => {} + } + } + None + } } #[derive(Debug)] @@ -123,14 +143,19 @@ pub enum ContextElement<'r, 's> { /// /// This is used for look-behind. DocumentRoot(&'s str), + + /// Stores a parser that indicates that children should exit upon matching an exit matcher. ExitMatcherNode(ExitMatcherNode<'r>), Context(&'r str), - /// Stores the indentation level of the current list item + /// Stores the indentation level of the current list item. ListItem(usize), - /// Stores the name of the greater block + /// Stores the name of the greater block. GreaterBlock(&'s str), + + /// Indicates if elements should consume the whitespace after them. + ConsumeTrailingWhitespace(bool), } #[derive(Debug)] diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 215280a6..d68aa9b2 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -63,10 +63,11 @@ pub fn plain_list_item<'r, 's>( // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) let indent_level = leading_whitespace.len(); let parser_context = context + .with_additional_node(ContextElement::ConsumeTrailingWhitespace(false)) + .with_additional_node(ContextElement::ListItem(indent_level)) .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)), - })) - .with_additional_node(ContextElement::ListItem(indent_level)); + })); let element_matcher = parser_with_context!(element)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);