Merge branch 'plain_list_fix'

This commit is contained in:
Tom Alexander 2023-04-15 00:04:18 -04:00
commit 313898ea48
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
4 changed files with 126 additions and 28 deletions

View File

@ -6,5 +6,7 @@
lorem
ipsum
ipsum
dolar

View File

@ -9,7 +9,7 @@ cd "$DIR"
: ${org_dir:="$DIR/../org_mode_samples"}
: ${compare_bin:="$DIR/../target/debug/org_compare"}
test_files=$(find $org_dir -type f -name '*.org')
test_files=$(find $org_dir -type f -name '*.org' | sort)
cargo build --bin org_compare

View File

@ -23,11 +23,15 @@ use nom::character::complete::one_of;
use nom::character::complete::space0;
use nom::character::complete::space1;
use nom::combinator::eof;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
use tracing::span;
#[tracing::instrument(ret, level = "debug")]
pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> {
@ -35,26 +39,97 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&plain_list_end)),
}));
let (mut remaining, first_item) = plain_list_item(&parser_context, input)?;
let first_item_indentation = first_item.indentation;
let plain_list_item_matcher = parser_with_context!(plain_list_item)(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let without_consume_context =
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
let with_consume_context =
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true));
let without_consume_matcher = parser_with_context!(plain_list_item)(&without_consume_context);
let with_consume_matcher = parser_with_context!(plain_list_item)(&with_consume_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
let mut children = Vec::new();
children.push(first_item);
let mut first_item_indentation: Option<usize> = None;
let mut remaining = input;
loop {
let exit_contents = exit_matcher(remaining);
if exit_contents.is_ok() {
break;
/*
Trailing whitespace belongs to the plain list, not the plain list item
Possible outcomes:
Don't consume, yes exit matcher
Don't consume, no additional item
Consume, additional item
*/
{
// Don't consume, yes exit matcher
let span = span!(tracing::Level::DEBUG, "first");
let _enter = span.enter();
let last_item_then_exit = tuple((without_consume_matcher, exit_matcher))(remaining);
match last_item_then_exit {
Ok((remain, (item, _exit)))
if item.indentation
== *first_item_indentation.get_or_insert(item.indentation) =>
{
remaining = remain;
children.push(item);
break;
}
Ok(_) | Err(_) => {}
};
}
let next_list_item = plain_list_item_matcher(remaining);
match next_list_item {
Ok((remain, next_child)) if next_child.indentation == first_item_indentation => {
children.push(next_child);
remaining = remain;
}
Ok(_) | Err(_) => break,
};
{
// Consume, additional item
let span = span!(tracing::Level::DEBUG, "second");
let _enter = span.enter();
let not_last_item =
tuple((with_consume_matcher, peek(without_consume_matcher)))(remaining);
match not_last_item {
Ok((remain, (item, future_item)))
if item.indentation
== *first_item_indentation.get_or_insert(item.indentation)
&& future_item.indentation
== *first_item_indentation.get_or_insert(item.indentation) =>
{
remaining = remain;
children.push(item);
continue;
}
Ok(_) | Err(_) => {}
};
}
{
// Don't consume, no additional item
let span = span!(tracing::Level::DEBUG, "third");
let _enter = span.enter();
let last_item_then_exit = without_consume_matcher(remaining);
match last_item_then_exit {
Ok((remain, item))
if item.indentation
== *first_item_indentation.get_or_insert(item.indentation) =>
{
remaining = remain;
children.push(item);
break;
}
Ok(_) | Err(_) => {
// TODO: Maybe this is reachable when there are no items at all.
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Should be unreachable.",
))));
unreachable!();
}
};
}
}
if children.is_empty() {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Plain lists require at least one element.",
))));
}
let (remaining, _trailing_ws) =
@ -73,20 +148,27 @@ pub fn plain_list_item<'r, 's>(
let (remaining, leading_whitespace) = space0(input)?;
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
let indent_level = leading_whitespace.len();
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false))
let with_consume_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)),
}));
let without_consume_context = context
.with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)),
}));
let element_matcher = parser_with_context!(element)(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let with_consume_matcher = parser_with_context!(element)(&with_consume_context);
let without_consume_matcher = parser_with_context!(element)(&without_consume_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
let (remaining, bull) =
verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?;
let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining);
match maybe_contentless_item {
Ok((rem, _ws)) => {
// TODO: do we need to consume if this isn't the last item?
let source = get_consumed(input, rem);
return Ok((
rem,
@ -100,9 +182,17 @@ pub fn plain_list_item<'r, 's>(
}
Err(_) => {
let (remaining, _ws) = space1(remaining)?;
// TODO: The problem is we are not capturing trailing whitespace for elements that are before the last element.
let (remaining, (contents, _exit_contents)) =
many_till(element_matcher, exit_matcher)(remaining)?;
let (remaining, (mut contents, final_element)) = many_till(
with_consume_matcher,
alt((
terminated(without_consume_matcher, exit_matcher),
preceded(
peek(tuple((with_consume_matcher, exit_matcher))),
without_consume_matcher,
),
)),
)(remaining)?;
contents.push(final_element);
let source = get_consumed(input, remaining);
return Ok((
remaining,
@ -329,15 +419,17 @@ baz"#;
lorem
ipsum
ipsum"#;
dolar"#;
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
let (remaining, result) =
plain_list_matcher(input).expect("Should parse the plain list successfully.");
assert_eq!(remaining, "ipsum");
assert_eq!(remaining, "dolar");
assert_eq!(
result.get_source(),
r#"1. foo
@ -348,6 +440,8 @@ ipsum"#;
lorem
ipsum
"#
);

View File

@ -6,5 +6,7 @@
lorem
ipsum
ipsum
dolar