Add a context element for tracking whether or not elements should consume their trailing whitespace.

This commit is contained in:
Tom Alexander 2023-04-10 10:36:16 -04:00
parent 6813c7c991
commit 9a0172e1a4
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
6 changed files with 60 additions and 10 deletions

View File

@ -100,10 +100,11 @@ pub fn document(input: &str) -> Res<&str, Document> {
fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Section<'s>> {
// TODO: The zeroth section is specialized so it probably needs its own parser
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::Context("section"))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&section_end)),
}))
.with_additional_node(ContextElement::Context("section"));
}));
let element_matcher = parser_with_context!(element)(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let trailing_matcher = parser_with_context!(element_trailing_whitespace)(&parser_context);

View File

@ -9,6 +9,7 @@ use super::paragraph::paragraph;
use super::plain_list::plain_list;
use super::source::Source;
use super::Context;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_with_context::parser_with_context;
use nom::branch::alt;
use nom::combinator::map;
@ -37,6 +38,26 @@ impl<'s> Source<'s> for Element<'s> {
}
}
#[tracing::instrument(ret, level = "debug")]
pub fn element_with_whitespace<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, Element<'s>> {
let parser_context =
context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true));
element(&parser_context, input)
}
#[tracing::instrument(ret, level = "debug")]
pub fn element_without_whitespace<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, Element<'s>> {
let parser_context =
context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
element(&parser_context, input)
}
#[tracing::instrument(ret, level = "debug")]
pub fn element<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Element<'s>> {
let non_paragraph_matcher = parser_with_context!(non_paragraph_element)(context);

View File

@ -34,8 +34,9 @@ pub fn footnote_definition<'r, 's>(
// Cannot be indented.
let (remaining, (_lead_in, lbl, _lead_out, _ws)) =
tuple((tag_no_case("[fn:"), label, tag("]"), space0))(input)?;
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::IgnoreParent(Some(&footnote_definition_end)),
}));
// TODO: The problem is we are not accounting for trailing whitespace like we do in section. Maybe it would be easier if we passed down whether or not to parse trailing whitespace into the element matcher similar to how tag takes in parameters.

View File

@ -41,10 +41,11 @@ pub fn greater_block<'r, 's>(
let (remaining, parameters) = opt(tuple((space1, parameters)))(remaining)?;
let (remaining, _nl) = line_ending(remaining)?;
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::GreaterBlock(name))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::IgnoreParent(Some(&greater_block_end)),
}))
.with_additional_node(ContextElement::GreaterBlock(name));
}));
let element_matcher = parser_with_context!(element)(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);

View File

@ -115,6 +115,26 @@ impl<'r, 's> ContextTree<'r, 's> {
}
None
}
/// Indicates if elements should consume the whitespace after them.
///
/// Defaults to true.
pub fn should_consume_trailing_whitespace(&self) -> bool {
self._should_consume_trailing_whitespace().unwrap_or(true)
}
fn _should_consume_trailing_whitespace(&self) -> Option<bool> {
for current_node in self.iter() {
let context_element = current_node.get_data();
match context_element {
ContextElement::ConsumeTrailingWhitespace(should) => {
return Some(*should);
}
_ => {}
}
}
None
}
}
#[derive(Debug)]
@ -123,14 +143,19 @@ pub enum ContextElement<'r, 's> {
///
/// This is used for look-behind.
DocumentRoot(&'s str),
/// Stores a parser that indicates that children should exit upon matching an exit matcher.
ExitMatcherNode(ExitMatcherNode<'r>),
Context(&'r str),
/// Stores the indentation level of the current list item
/// Stores the indentation level of the current list item.
ListItem(usize),
/// Stores the name of the greater block
/// Stores the name of the greater block.
GreaterBlock(&'s str),
/// Indicates if elements should consume the whitespace after them.
ConsumeTrailingWhitespace(bool),
}
#[derive(Debug)]

View File

@ -63,10 +63,11 @@ pub fn plain_list_item<'r, 's>(
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
let indent_level = leading_whitespace.len();
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false))
.with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)),
}))
.with_additional_node(ContextElement::ListItem(indent_level));
}));
let element_matcher = parser_with_context!(element)(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);