From 08fed1301effbe119d1050aae2572d6e7c62beba Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 14 Apr 2023 19:24:05 -0400 Subject: [PATCH] Fix plain list parser to not consume trailing whitespace on the last item. --- .../list_paragraph_nesting.org | 4 +- src/parser/plain_list.rs | 99 +++++++++++++++---- toy_language.txt | 4 +- 3 files changed, 85 insertions(+), 22 deletions(-) diff --git a/org_mode_samples/exit_matcher_investigation/list_paragraph_nesting.org b/org_mode_samples/exit_matcher_investigation/list_paragraph_nesting.org index 843a1e0..6497a29 100644 --- a/org_mode_samples/exit_matcher_investigation/list_paragraph_nesting.org +++ b/org_mode_samples/exit_matcher_investigation/list_paragraph_nesting.org @@ -6,5 +6,7 @@ lorem + ipsum -ipsum + +dolar diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 6bd455e..045b924 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -23,6 +23,7 @@ use nom::character::complete::one_of; use nom::character::complete::space0; use nom::character::complete::space1; use nom::combinator::eof; +use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many1; @@ -35,26 +36,73 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { exit_matcher: ChainBehavior::AndParent(Some(&plain_list_end)), })); - let (mut remaining, first_item) = plain_list_item(&parser_context, input)?; - let first_item_indentation = first_item.indentation; - let plain_list_item_matcher = parser_with_context!(plain_list_item)(&parser_context); - let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + let without_consume_context = + parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false)); + let with_consume_context = + parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true)); + let without_consume_matcher = parser_with_context!(plain_list_item)(&without_consume_context); + let with_consume_matcher = parser_with_context!(plain_list_item)(&with_consume_context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context); let mut children = Vec::new(); - children.push(first_item); - loop { - let exit_contents = exit_matcher(remaining); - if exit_contents.is_ok() { - break; - } + let mut first_item_indentation: Option = None; + let mut remaining = input; - let next_list_item = plain_list_item_matcher(remaining); - match next_list_item { - Ok((remain, next_child)) if next_child.indentation == first_item_indentation => { - children.push(next_child); + loop { + /* + Trailing whitespace belongs to the plain list, not the plain list item + + Possible outcomes: + Don't consume, yes exit matcher + Don't consume, no additional item + Consume, additional item + */ + let last_item_then_exit = tuple((without_consume_matcher, exit_matcher))(remaining); + match last_item_then_exit { + Ok((remain, (item, _exit))) + if item.indentation == *first_item_indentation.get_or_insert(item.indentation) => + { remaining = remain; + children.push(item); + break; } - Ok(_) | Err(_) => break, + Ok(_) | Err(_) => {} }; + + let not_last_item = tuple((with_consume_matcher, peek(without_consume_matcher)))(remaining); + match not_last_item { + Ok((remain, (item, _future_item))) + if item.indentation == *first_item_indentation.get_or_insert(item.indentation) => + { + remaining = remain; + children.push(item); + continue; + } + Ok(_) | Err(_) => {} + }; + + // If its not (don't consume, exit) and its not (consume, see another item) then it must be (don't consume, no additional item) + let last_item_then_exit = without_consume_matcher(remaining); + match last_item_then_exit { + Ok((remain, item)) + if item.indentation == *first_item_indentation.get_or_insert(item.indentation) => + { + remaining = remain; + children.push(item); + break; + } + Ok(_) | Err(_) => { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Should be unreachable.", + )))); + unreachable!(); + } + }; + } + + if children.is_empty() { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Plain lists require at least one element.", + )))); } let (remaining, _trailing_ws) = @@ -73,20 +121,27 @@ pub fn plain_list_item<'r, 's>( let (remaining, leading_whitespace) = space0(input)?; // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) let indent_level = leading_whitespace.len(); - let parser_context = context + let with_consume_context = context + .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true)) + .with_additional_node(ContextElement::ListItem(indent_level)) + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)), + })); + let without_consume_context = context .with_additional_node(ContextElement::ConsumeTrailingWhitespace(false)) .with_additional_node(ContextElement::ListItem(indent_level)) .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)), })); - let element_matcher = parser_with_context!(element)(&parser_context); - let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + let element_matcher = parser_with_context!(element)(&with_consume_context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context); let (remaining, bull) = verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?; let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining); match maybe_contentless_item { Ok((rem, _ws)) => { + // TODO: do we need to consume if this isn't the last item? let source = get_consumed(input, rem); return Ok(( rem, @@ -329,15 +384,17 @@ baz"#; lorem + ipsum -ipsum"#; + +dolar"#; let initial_context: ContextTree<'_, '_> = ContextTree::new(); let document_context = initial_context.with_additional_node(ContextElement::DocumentRoot(input)); let plain_list_matcher = parser_with_context!(plain_list)(&document_context); let (remaining, result) = plain_list_matcher(input).expect("Should parse the plain list successfully."); - assert_eq!(remaining, "ipsum"); + assert_eq!(remaining, "dolar"); assert_eq!( result.get_source(), r#"1. foo @@ -348,6 +405,8 @@ ipsum"#; lorem + ipsum + "# ); diff --git a/toy_language.txt b/toy_language.txt index 843a1e0..6497a29 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -6,5 +6,7 @@ lorem + ipsum -ipsum + +dolar