diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 4ffef25..91aeceb 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -3,6 +3,7 @@ use nom::bytes::complete::tag; use nom::character::complete::anychar; use nom::character::complete::digit1; use nom::character::complete::line_ending; +use nom::character::complete::multispace1; use nom::character::complete::one_of; use nom::character::complete::space0; use nom::character::complete::space1; @@ -17,6 +18,7 @@ use nom::multi::many0; use nom::multi::many1; use nom::multi::many_till; use nom::sequence::tuple; +use nom::InputTake; use super::affiliated_keyword::parse_affiliated_keywords; use super::element_parser::element; @@ -25,6 +27,7 @@ use super::org_source::OrgSource; use super::util::include_input; use super::util::indentation_level; use super::util::non_whitespace_character; +use crate::context::bind_context; use crate::context::parser_with_context; use crate::context::ContextElement; use crate::context::ContextMatcher; @@ -80,6 +83,46 @@ where return Err(nom::Err::Error(CustomError::Static("No element detected."))); } +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +pub(crate) fn detect_not_plain_list_item_indent<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, (u16, OrgSource<'s>)> { + if let Ok((_remaining, (_, indent, _))) = tuple(( + start_of_line, + parser_with_context!(indentation_level)(context), + not(tuple(( + parser_with_context!(bullet)(context), + alt((space1, line_ending, eof)), + ))), + ))(input) + { + return Ok((input, indent)); + } + + // Headlines are not plain list items. + if let Ok((_remaining, (_, indent, _))) = verify( + tuple(( + start_of_line, + parser_with_context!(indentation_level)(context), + tuple(( + parser_with_context!(bullet)(context), + alt((space1, line_ending, eof)), + )), + )), + |(_, (depth, _), ((_, bullet), _))| { + *depth == 0 && Into::<&str>::into(bullet).starts_with('*') + }, + )(input) + { + return Ok((input, indent)); + } + return Err(nom::Err::Error(CustomError::Static("No element detected."))); +} + #[cfg_attr( feature = "tracing", tracing::instrument(ret, level = "debug", skip(context, affiliated_keywords)) @@ -120,7 +163,7 @@ where // While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed. loop { - let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining); + let list_item = plain_list_item(&parser_context, remaining); match (&first_item_list_type, &list_item) { (None, Ok((_remain, (list_type, _item)))) => { let _ = first_item_list_type.insert(*list_type); @@ -140,25 +183,17 @@ where } }; - let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining); + let maybe_exit = exit_matcher_parser(&parser_context, remaining); if maybe_exit.is_ok() { break; } } - let (final_child_start, _final_item_first_parse) = match children.pop() { - Some(final_child) => final_child, - None => { - return Err(nom::Err::Error(CustomError::Static( - "Plain lists require at least one element.", - ))); - } - }; - let final_item_context = ContextElement::ConsumeTrailingWhitespace(false); - let final_item_context = parser_context.with_additional_node(&final_item_context); - let (remaining, (_, reparsed_final_item)) = - parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?; - children.push((final_child_start, reparsed_final_item)); + if children.is_empty() { + return Err(nom::Err::Error(CustomError::Static( + "Plain lists require at least one element.", + ))); + } let (remaining, _trailing_ws) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; @@ -187,10 +222,10 @@ fn plain_list_item<'b, 'g, 'r, 's>( ) -> Res, (PlainListType, PlainListItem<'s>)> { start_of_line(input)?; let (remaining, (indent_level, _leading_whitespace)) = indentation_level(context, input)?; - let (remaining, (bullet_type, bull)) = verify( - parser_with_context!(bullet)(context), - |(_bullet_type, bull)| !Into::<&str>::into(bull).starts_with('*') || indent_level > 0, - )(remaining)?; + let (remaining, (bullet_type, bull)) = + verify(bind_context!(bullet, context), |(_bullet_type, bull)| { + !Into::<&str>::into(bull).starts_with('*') || indent_level > 0 + })(remaining)?; let (remaining, maybe_counter_set) = opt(tuple((space1, tag("[@"), counter_set_value, tag("]"))))(remaining)?; @@ -199,7 +234,7 @@ fn plain_list_item<'b, 'g, 'r, 's>( let (remaining, maybe_checkbox) = opt(tuple((space1, item_checkbox)))(remaining)?; let (remaining, maybe_tag) = if let BulletType::Unordered = bullet_type { - opt(tuple((space1, parser_with_context!(item_tag)(context))))(remaining)? + opt(tuple((space1, bind_context!(item_tag, context))))(remaining)? } else { (remaining, None) }; @@ -211,6 +246,12 @@ fn plain_list_item<'b, 'g, 'r, 's>( }; let exit_matcher = plain_list_item_end(indent_level); + let final_item_whitespace_cutoff = final_item_whitespace_cutoff(indent_level); + let final_whitespace_context = ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &final_item_whitespace_cutoff, + }); + let final_whitespace_context = context.with_additional_node(&final_whitespace_context); let contexts = [ ContextElement::ConsumeTrailingWhitespace(true), ContextElement::ExitMatcherNode(ExitMatcherNode { @@ -218,17 +259,21 @@ fn plain_list_item<'b, 'g, 'r, 's>( exit_matcher: &exit_matcher, }), ]; - let parser_context = context.with_additional_node(&contexts[0]); + let parser_context = final_whitespace_context.with_additional_node(&contexts[0]); let parser_context = parser_context.with_additional_node(&contexts[1]); - let maybe_contentless_item: Res, ()> = peek(parser_with_context!( - detect_contentless_item_contents - )(&parser_context))(remaining); + let maybe_contentless_item: Res, ()> = + detect_contentless_item_contents(&parser_context, remaining); if let Ok((_rem, _ws)) = maybe_contentless_item { - let (remaining, _trailing_ws) = if context.should_consume_trailing_whitespace() { - recognize(alt((recognize(many1(blank_line)), eof)))(remaining)? - } else { + let (remaining, _trailing_ws) = if tuple(( + blank_line, + bind_context!(final_item_whitespace_cutoff, context), + ))(remaining) + .is_ok() + { recognize(alt((blank_line, eof)))(remaining)? + } else { + recognize(alt((recognize(many1(blank_line)), eof)))(remaining)? }; let source = get_consumed(input, remaining); return Ok(( @@ -256,26 +301,14 @@ fn plain_list_item<'b, 'g, 'r, 's>( .filter(|b| *b == b'\n') .count(); - let (mut remaining, (mut children, _exit_contents)) = many_till( - include_input(parser_with_context!(element(true))(&parser_context)), - parser_with_context!(exit_matcher_parser)(&parser_context), + let (remaining, (children, _exit_contents)) = many_till( + include_input(bind_context!(element(true), &parser_context)), + bind_context!(exit_matcher_parser, &parser_context), )(remaining)?; - if !children.is_empty() && !context.should_consume_trailing_whitespace() { - let final_item_context = ContextElement::ConsumeTrailingWhitespace(false); - let final_item_context = parser_context.with_additional_node(&final_item_context); - let (final_child_start, _original_final_child) = children - .pop() - .expect("if-statement already checked that children was non-empty."); - let (remain, reparsed_final_element) = include_input(parser_with_context!(element(true))( - &final_item_context, - ))(final_child_start)?; - remaining = remain; - children.push(reparsed_final_element); - } - + // We have to use the parser_context here to include the whitespace cut-off let (remaining, _trailing_ws) = - maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; + maybe_consume_trailing_whitespace_if_not_exiting(&final_whitespace_context, remaining)?; let source = get_consumed(input, remaining); return Ok(( @@ -322,7 +355,7 @@ fn bullet<'b, 'g, 'r, 's>( map(tag("+"), |bull| (BulletType::Unordered, bull)), map( recognize(tuple(( - parser_with_context!(counter)(context), + bind_context!(counter, context), alt((tag("."), tag(")"))), ))), |bull| (BulletType::Ordered, bull), @@ -377,6 +410,52 @@ fn counter_set_value<'s>(input: OrgSource<'s>) -> Res, PlainListIt ))(input) } +const fn final_item_whitespace_cutoff(indent_level: IndentationLevel) -> impl ContextMatcher { + move |context, input: OrgSource<'_>| { + impl_final_item_whitespace_cutoff(context, input, indent_level) + } +} + +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] +fn impl_final_item_whitespace_cutoff<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, + indent_level: IndentationLevel, +) -> Res, OrgSource<'s>> { + start_of_line(input)?; + // element!(plain_list_end, context, input); + + if let Ok((_remaining, _)) = verify( + tuple(( + opt(blank_line), + bind_context!(indentation_level, context), + not(multispace1), + )), + |(_, (depth, _stars), _not_whitespace)| *depth < indent_level, + )(input) + { + return Ok((input, input.take(0))); + } + + if let Ok((_remaining, _)) = tuple(( + opt(blank_line), + verify( + bind_context!(detect_not_plain_list_item_indent, context), + |(depth, _)| *depth == indent_level, + ), + ))(input) + { + return Ok((input, input.take(0))); + } + + Err(nom::Err::Error(CustomError::Static( + "No whitespace cut-off.", + ))) +} + #[cfg_attr( feature = "tracing", tracing::instrument(ret, level = "debug", skip(_context)) @@ -412,7 +491,7 @@ fn _plain_list_item_end<'b, 'g, 'r, 's>( start_of_line(input)?; recognize(tuple(( opt(blank_line), - parser_with_context!(line_indented_lte_matcher)(context), + bind_context!(line_indented_lte_matcher, context), )))(input) } @@ -431,7 +510,7 @@ fn _line_indented_lte<'b, 'g, 'r, 's>( ) -> Res, OrgSource<'s>> { let matched = recognize(verify( tuple(( - parser_with_context!(indentation_level)(context), + bind_context!(indentation_level, context), non_whitespace_character, )), // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) @@ -457,8 +536,8 @@ fn item_tag<'b, 'g, 'r, 's>( let (remaining, (children, _exit_contents)) = verify( many_till( // TODO: Should this be using a different set like the minimal set? - parser_with_context!(standard_set_object)(&parser_context), - parser_with_context!(exit_matcher_parser)(&parser_context), + bind_context!(standard_set_object, &parser_context), + bind_context!(exit_matcher_parser, &parser_context), ), |(children, _exit_contents)| !children.is_empty(), )(input)?; @@ -508,7 +587,7 @@ fn item_tag_post_gap<'b, 'g, 'r, 's>( alt(( peek(recognize(not(blank_line))), peek(recognize(tuple((many0(blank_line), eof)))), - parser_with_context!(exit_matcher_parser)(context), + bind_context!(exit_matcher_parser, context), )), ), ))), @@ -538,7 +617,7 @@ fn detect_contentless_item_contents<'b, 'g, 'r, 's>( ) -> Res, ()> { let (remaining, _) = recognize(many_till( blank_line, - parser_with_context!(exit_matcher_parser)(context), + bind_context!(exit_matcher_parser, context), ))(input)?; Ok((remaining, ())) } diff --git a/src/parser/util.rs b/src/parser/util.rs index 538f2b9..82f1250 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -243,6 +243,10 @@ pub(crate) fn org_line_ending(input: OrgSource<'_>) -> Res, OrgSou } /// Match the whitespace at the beginning of a line and give it an indentation level. +#[cfg_attr( + feature = "tracing", + tracing::instrument(ret, level = "debug", skip(context)) +)] pub(crate) fn indentation_level<'s>( context: RefContext<'_, '_, '_, 's>, input: OrgSource<'s>,