Fix plain list parser to not consume trailing whitespace on the last item.

This commit is contained in:
Tom Alexander 2023-04-14 19:24:05 -04:00
parent 665c1d70fb
commit 08fed1301e
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
3 changed files with 85 additions and 22 deletions

View File

@ -6,5 +6,7 @@
lorem lorem
ipsum
ipsum
dolar

View File

@ -23,6 +23,7 @@ use nom::character::complete::one_of;
use nom::character::complete::space0; use nom::character::complete::space0;
use nom::character::complete::space1; use nom::character::complete::space1;
use nom::combinator::eof; use nom::combinator::eof;
use nom::combinator::peek;
use nom::combinator::recognize; use nom::combinator::recognize;
use nom::combinator::verify; use nom::combinator::verify;
use nom::multi::many1; use nom::multi::many1;
@ -35,26 +36,73 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&plain_list_end)), exit_matcher: ChainBehavior::AndParent(Some(&plain_list_end)),
})); }));
let (mut remaining, first_item) = plain_list_item(&parser_context, input)?; let without_consume_context =
let first_item_indentation = first_item.indentation; parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
let plain_list_item_matcher = parser_with_context!(plain_list_item)(&parser_context); let with_consume_context =
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true));
let without_consume_matcher = parser_with_context!(plain_list_item)(&without_consume_context);
let with_consume_matcher = parser_with_context!(plain_list_item)(&with_consume_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
let mut children = Vec::new(); let mut children = Vec::new();
children.push(first_item); let mut first_item_indentation: Option<usize> = None;
loop { let mut remaining = input;
let exit_contents = exit_matcher(remaining);
if exit_contents.is_ok() {
break;
}
let next_list_item = plain_list_item_matcher(remaining); loop {
match next_list_item { /*
Ok((remain, next_child)) if next_child.indentation == first_item_indentation => { Trailing whitespace belongs to the plain list, not the plain list item
children.push(next_child);
Possible outcomes:
Don't consume, yes exit matcher
Don't consume, no additional item
Consume, additional item
*/
let last_item_then_exit = tuple((without_consume_matcher, exit_matcher))(remaining);
match last_item_then_exit {
Ok((remain, (item, _exit)))
if item.indentation == *first_item_indentation.get_or_insert(item.indentation) =>
{
remaining = remain; remaining = remain;
children.push(item);
break;
} }
Ok(_) | Err(_) => break, Ok(_) | Err(_) => {}
}; };
let not_last_item = tuple((with_consume_matcher, peek(without_consume_matcher)))(remaining);
match not_last_item {
Ok((remain, (item, _future_item)))
if item.indentation == *first_item_indentation.get_or_insert(item.indentation) =>
{
remaining = remain;
children.push(item);
continue;
}
Ok(_) | Err(_) => {}
};
// If its not (don't consume, exit) and its not (consume, see another item) then it must be (don't consume, no additional item)
let last_item_then_exit = without_consume_matcher(remaining);
match last_item_then_exit {
Ok((remain, item))
if item.indentation == *first_item_indentation.get_or_insert(item.indentation) =>
{
remaining = remain;
children.push(item);
break;
}
Ok(_) | Err(_) => {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Should be unreachable.",
))));
unreachable!();
}
};
}
if children.is_empty() {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Plain lists require at least one element.",
))));
} }
let (remaining, _trailing_ws) = let (remaining, _trailing_ws) =
@ -73,20 +121,27 @@ pub fn plain_list_item<'r, 's>(
let (remaining, leading_whitespace) = space0(input)?; let (remaining, leading_whitespace) = space0(input)?;
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
let indent_level = leading_whitespace.len(); let indent_level = leading_whitespace.len();
let parser_context = context let with_consume_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)),
}));
let without_consume_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false)) .with_additional_node(ContextElement::ConsumeTrailingWhitespace(false))
.with_additional_node(ContextElement::ListItem(indent_level)) .with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)), exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)),
})); }));
let element_matcher = parser_with_context!(element)(&parser_context); let element_matcher = parser_with_context!(element)(&with_consume_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
let (remaining, bull) = let (remaining, bull) =
verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?; verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?;
let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining); let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining);
match maybe_contentless_item { match maybe_contentless_item {
Ok((rem, _ws)) => { Ok((rem, _ws)) => {
// TODO: do we need to consume if this isn't the last item?
let source = get_consumed(input, rem); let source = get_consumed(input, rem);
return Ok(( return Ok((
rem, rem,
@ -329,15 +384,17 @@ baz"#;
lorem lorem
ipsum
ipsum"#;
dolar"#;
let initial_context: ContextTree<'_, '_> = ContextTree::new(); let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context = let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input)); initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let plain_list_matcher = parser_with_context!(plain_list)(&document_context); let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
let (remaining, result) = let (remaining, result) =
plain_list_matcher(input).expect("Should parse the plain list successfully."); plain_list_matcher(input).expect("Should parse the plain list successfully.");
assert_eq!(remaining, "ipsum"); assert_eq!(remaining, "dolar");
assert_eq!( assert_eq!(
result.get_source(), result.get_source(),
r#"1. foo r#"1. foo
@ -348,6 +405,8 @@ ipsum"#;
lorem lorem
ipsum
"# "#
); );

View File

@ -6,5 +6,7 @@
lorem lorem
ipsum
ipsum
dolar