Merge branch 'list_perf_improvement'

Fix empty content items with final item whitespace cut-off before headlines.
Do not match headlines as plain list items.
2023-10-18 08:40:19 -04:00 · 2023-10-17 15:56:02 -04:00 · 2023-10-17 15:35:43 -04:00 · 2023-10-17 15:22:31 -04:00 · 2023-10-17 15:08:36 -04:00 · 2023-10-17 14:17:47 -04:00
2 changed files with 134 additions and 51 deletions
--- a/src/parser/plain_list.rs
+++ b/src/parser/plain_list.rs
@@ -3,6 +3,7 @@ use nom::bytes::complete::tag;
 use nom::character::complete::anychar;
 use nom::character::complete::digit1;
 use nom::character::complete::line_ending;
+use nom::character::complete::multispace1;
 use nom::character::complete::one_of;
 use nom::character::complete::space0;
 use nom::character::complete::space1;
@@ -17,6 +18,7 @@ use nom::multi::many0;
 use nom::multi::many1;
 use nom::multi::many_till;
 use nom::sequence::tuple;
+use nom::InputTake;

 use super::affiliated_keyword::parse_affiliated_keywords;
 use super::element_parser::element;
@@ -25,6 +27,7 @@ use super::org_source::OrgSource;
 use super::util::include_input;
 use super::util::indentation_level;
 use super::util::non_whitespace_character;
+use crate::context::bind_context;
 use crate::context::parser_with_context;
 use crate::context::ContextElement;
 use crate::context::ContextMatcher;
@@ -80,6 +83,46 @@ where
    return Err(nom::Err::Error(CustomError::Static("No element detected.")));
 }

+#[cfg_attr(
+    feature = "tracing",
+    tracing::instrument(ret, level = "debug", skip(context))
+)]
+pub(crate) fn detect_not_plain_list_item_indent<'b, 'g, 'r, 's>(
+    context: RefContext<'b, 'g, 'r, 's>,
+    input: OrgSource<'s>,
+) -> Res<OrgSource<'s>, (u16, OrgSource<'s>)> {
+    if let Ok((_remaining, (_, indent, _))) = tuple((
+        start_of_line,
+        parser_with_context!(indentation_level)(context),
+        not(tuple((
+            parser_with_context!(bullet)(context),
+            alt((space1, line_ending, eof)),
+        ))),
+    ))(input)
+    {
+        return Ok((input, indent));
+    }
+
+    // Headlines are not plain list items.
+    if let Ok((_remaining, (_, indent, _))) = verify(
+        tuple((
+            start_of_line,
+            parser_with_context!(indentation_level)(context),
+            tuple((
+                parser_with_context!(bullet)(context),
+                alt((space1, line_ending, eof)),
+            )),
+        )),
+        |(_, (depth, _), ((_, bullet), _))| {
+            *depth == 0 && Into::<&str>::into(bullet).starts_with('*')
+        },
+    )(input)
+    {
+        return Ok((input, indent));
+    }
+    return Err(nom::Err::Error(CustomError::Static("No element detected.")));
+}
+
 #[cfg_attr(
    feature = "tracing",
    tracing::instrument(ret, level = "debug", skip(context, affiliated_keywords))
@@ -120,7 +163,7 @@ where
    // While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed.

    loop {
-        let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining);
+        let list_item = plain_list_item(&parser_context, remaining);
        match (&first_item_list_type, &list_item) {
            (None, Ok((_remain, (list_type, _item)))) => {
                let _ = first_item_list_type.insert(*list_type);
@@ -140,25 +183,17 @@ where
            }
        };

-        let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining);
+        let maybe_exit = exit_matcher_parser(&parser_context, remaining);
        if maybe_exit.is_ok() {
            break;
        }
    }

-    let (final_child_start, _final_item_first_parse) = match children.pop() {
-        Some(final_child) => final_child,
-        None => {
+    if children.is_empty() {
        return Err(nom::Err::Error(CustomError::Static(
            "Plain lists require at least one element.",
        )));
    }
-    };
-    let final_item_context = ContextElement::ConsumeTrailingWhitespace(false);
-    let final_item_context = parser_context.with_additional_node(&final_item_context);
-    let (remaining, (_, reparsed_final_item)) =
-        parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?;
-    children.push((final_child_start, reparsed_final_item));

    let (remaining, _trailing_ws) =
        maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
@@ -187,10 +222,10 @@ fn plain_list_item<'b, 'g, 'r, 's>(
 ) -> Res<OrgSource<'s>, (PlainListType, PlainListItem<'s>)> {
    start_of_line(input)?;
    let (remaining, (indent_level, _leading_whitespace)) = indentation_level(context, input)?;
-    let (remaining, (bullet_type, bull)) = verify(
-        parser_with_context!(bullet)(context),
-        |(_bullet_type, bull)| !Into::<&str>::into(bull).starts_with('*') || indent_level > 0,
-    )(remaining)?;
+    let (remaining, (bullet_type, bull)) =
+        verify(bind_context!(bullet, context), |(_bullet_type, bull)| {
+            !Into::<&str>::into(bull).starts_with('*') || indent_level > 0
+        })(remaining)?;

    let (remaining, maybe_counter_set) =
        opt(tuple((space1, tag("[@"), counter_set_value, tag("]"))))(remaining)?;
@@ -199,7 +234,7 @@ fn plain_list_item<'b, 'g, 'r, 's>(
    let (remaining, maybe_checkbox) = opt(tuple((space1, item_checkbox)))(remaining)?;

    let (remaining, maybe_tag) = if let BulletType::Unordered = bullet_type {
-        opt(tuple((space1, parser_with_context!(item_tag)(context))))(remaining)?
+        opt(tuple((space1, bind_context!(item_tag, context))))(remaining)?
    } else {
        (remaining, None)
    };
@@ -211,6 +246,12 @@ fn plain_list_item<'b, 'g, 'r, 's>(
    };

    let exit_matcher = plain_list_item_end(indent_level);
+    let final_item_whitespace_cutoff = final_item_whitespace_cutoff(indent_level);
+    let final_whitespace_context = ContextElement::ExitMatcherNode(ExitMatcherNode {
+        class: ExitClass::Beta,
+        exit_matcher: &final_item_whitespace_cutoff,
+    });
+    let final_whitespace_context = context.with_additional_node(&final_whitespace_context);
    let contexts = [
        ContextElement::ConsumeTrailingWhitespace(true),
        ContextElement::ExitMatcherNode(ExitMatcherNode {
@@ -218,17 +259,21 @@ fn plain_list_item<'b, 'g, 'r, 's>(
            exit_matcher: &exit_matcher,
        }),
    ];
-    let parser_context = context.with_additional_node(&contexts[0]);
+    let parser_context = final_whitespace_context.with_additional_node(&contexts[0]);
    let parser_context = parser_context.with_additional_node(&contexts[1]);

-    let maybe_contentless_item: Res<OrgSource<'_>, ()> = peek(parser_with_context!(
-        detect_contentless_item_contents
-    )(&parser_context))(remaining);
+    let maybe_contentless_item: Res<OrgSource<'_>, ()> =
+        detect_contentless_item_contents(&parser_context, remaining);
    if let Ok((_rem, _ws)) = maybe_contentless_item {
-        let (remaining, _trailing_ws) = if context.should_consume_trailing_whitespace() {
-            recognize(alt((recognize(many1(blank_line)), eof)))(remaining)?
-        } else {
+        let (remaining, _trailing_ws) = if tuple((
+            blank_line,
+            bind_context!(final_item_whitespace_cutoff, context),
+        ))(remaining)
+        .is_ok()
+        {
            recognize(alt((blank_line, eof)))(remaining)?
+        } else {
+            recognize(alt((recognize(many1(blank_line)), eof)))(remaining)?
        };
        let source = get_consumed(input, remaining);
        return Ok((
@@ -256,26 +301,14 @@ fn plain_list_item<'b, 'g, 'r, 's>(
        .filter(|b| *b == b'\n')
        .count();

-    let (mut remaining, (mut children, _exit_contents)) = many_till(
-        include_input(parser_with_context!(element(true))(&parser_context)),
-        parser_with_context!(exit_matcher_parser)(&parser_context),
+    let (remaining, (children, _exit_contents)) = many_till(
+        include_input(bind_context!(element(true), &parser_context)),
+        bind_context!(exit_matcher_parser, &parser_context),
    )(remaining)?;

-    if !children.is_empty() && !context.should_consume_trailing_whitespace() {
-        let final_item_context = ContextElement::ConsumeTrailingWhitespace(false);
-        let final_item_context = parser_context.with_additional_node(&final_item_context);
-        let (final_child_start, _original_final_child) = children
-            .pop()
-            .expect("if-statement already checked that children was non-empty.");
-        let (remain, reparsed_final_element) = include_input(parser_with_context!(element(true))(
-            &final_item_context,
-        ))(final_child_start)?;
-        remaining = remain;
-        children.push(reparsed_final_element);
-    }
-
+    // We have to use the parser_context here to include the whitespace cut-off
    let (remaining, _trailing_ws) =
-        maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
+        maybe_consume_trailing_whitespace_if_not_exiting(&final_whitespace_context, remaining)?;

    let source = get_consumed(input, remaining);
    return Ok((
@@ -322,7 +355,7 @@ fn bullet<'b, 'g, 'r, 's>(
            map(tag("+"), |bull| (BulletType::Unordered, bull)),
            map(
                recognize(tuple((
-                    parser_with_context!(counter)(context),
+                    bind_context!(counter, context),
                    alt((tag("."), tag(")"))),
                ))),
                |bull| (BulletType::Ordered, bull),
@@ -377,6 +410,52 @@ fn counter_set_value<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, PlainListIt
    ))(input)
 }

+const fn final_item_whitespace_cutoff(indent_level: IndentationLevel) -> impl ContextMatcher {
+    move |context, input: OrgSource<'_>| {
+        impl_final_item_whitespace_cutoff(context, input, indent_level)
+    }
+}
+
+#[cfg_attr(
+    feature = "tracing",
+    tracing::instrument(ret, level = "debug", skip(context))
+)]
+fn impl_final_item_whitespace_cutoff<'b, 'g, 'r, 's>(
+    context: RefContext<'b, 'g, 'r, 's>,
+    input: OrgSource<'s>,
+    indent_level: IndentationLevel,
+) -> Res<OrgSource<'s>, OrgSource<'s>> {
+    start_of_line(input)?;
+    // element!(plain_list_end, context, input);
+
+    if let Ok((_remaining, _)) = verify(
+        tuple((
+            opt(blank_line),
+            bind_context!(indentation_level, context),
+            not(multispace1),
+        )),
+        |(_, (depth, _stars), _not_whitespace)| *depth < indent_level,
+    )(input)
+    {
+        return Ok((input, input.take(0)));
+    }
+
+    if let Ok((_remaining, _)) = tuple((
+        opt(blank_line),
+        verify(
+            bind_context!(detect_not_plain_list_item_indent, context),
+            |(depth, _)| *depth == indent_level,
+        ),
+    ))(input)
+    {
+        return Ok((input, input.take(0)));
+    }
+
+    Err(nom::Err::Error(CustomError::Static(
+        "No whitespace cut-off.",
+    )))
+}
+
 #[cfg_attr(
    feature = "tracing",
    tracing::instrument(ret, level = "debug", skip(_context))
@@ -412,7 +491,7 @@ fn _plain_list_item_end<'b, 'g, 'r, 's>(
    start_of_line(input)?;
    recognize(tuple((
        opt(blank_line),
-        parser_with_context!(line_indented_lte_matcher)(context),
+        bind_context!(line_indented_lte_matcher, context),
    )))(input)
 }

@@ -431,7 +510,7 @@ fn _line_indented_lte<'b, 'g, 'r, 's>(
 ) -> Res<OrgSource<'s>, OrgSource<'s>> {
    let matched = recognize(verify(
        tuple((
-            parser_with_context!(indentation_level)(context),
+            bind_context!(indentation_level, context),
            non_whitespace_character,
        )),
        // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
@@ -457,8 +536,8 @@ fn item_tag<'b, 'g, 'r, 's>(
    let (remaining, (children, _exit_contents)) = verify(
        many_till(
            // TODO: Should this be using a different set like the minimal set?
-            parser_with_context!(standard_set_object)(&parser_context),
-            parser_with_context!(exit_matcher_parser)(&parser_context),
+            bind_context!(standard_set_object, &parser_context),
+            bind_context!(exit_matcher_parser, &parser_context),
        ),
        |(children, _exit_contents)| !children.is_empty(),
    )(input)?;
@@ -508,7 +587,7 @@ fn item_tag_post_gap<'b, 'g, 'r, 's>(
                alt((
                    peek(recognize(not(blank_line))),
                    peek(recognize(tuple((many0(blank_line), eof)))),
-                    parser_with_context!(exit_matcher_parser)(context),
+                    bind_context!(exit_matcher_parser, context),
                )),
            ),
        ))),
@@ -538,7 +617,7 @@ fn detect_contentless_item_contents<'b, 'g, 'r, 's>(
 ) -> Res<OrgSource<'s>, ()> {
    let (remaining, _) = recognize(many_till(
        blank_line,
-        parser_with_context!(exit_matcher_parser)(context),
+        bind_context!(exit_matcher_parser, context),
    ))(input)?;
    Ok((remaining, ()))
 }
--- a/src/parser/util.rs
+++ b/src/parser/util.rs
@@ -243,6 +243,10 @@ pub(crate) fn org_line_ending(input: OrgSource<'_>) -> Res<OrgSource<'_>, OrgSou
 }

 /// Match the whitespace at the beginning of a line and give it an indentation level.
+#[cfg_attr(
+    feature = "tracing",
+    tracing::instrument(ret, level = "debug", skip(context))
+)]
 pub(crate) fn indentation_level<'s>(
    context: RefContext<'_, '_, '_, 's>,
    input: OrgSource<'s>,
Author	SHA1	Message	Date
Tom Alexander	8db9038c53	Merge branch 'list_perf_improvement' Some checks failed rust-build Build rust-build has succeeded Details rust-foreign-document-test Build rust-foreign-document-test has failed Details rust-test Build rust-test has failed Details rustfmt Build rustfmt has succeeded Details clippy Build clippy has succeeded Details	2023-10-18 08:40:19 -04:00
Tom Alexander	a276ba70e0	Fix empty content items with final item whitespace cut-off before headlines. Some checks failed clippy Build clippy has succeeded Details rust-foreign-document-test Build rust-foreign-document-test has failed Details rust-build Build rust-build has succeeded Details rust-test Build rust-test has succeeded Details	2023-10-17 15:56:02 -04:00
Tom Alexander	b7442c1e92	Do not match headlines as plain list items.	2023-10-17 15:35:43 -04:00
Tom Alexander	364ba79517	It actually worked on trailing whitespace ownership test case 2.	2023-10-17 15:22:31 -04:00
Tom Alexander	47408763e5	A first stab at a final item whitespace cut-off exit matcher.	2023-10-17 15:08:36 -04:00
Tom Alexander	bd187ebfe7	Remove re-parsing of the final list child.	2023-10-17 14:17:47 -04:00
Tom Alexander	59cb3c2bbf	Remove unnecessary closures in plain lists.	2023-10-17 13:59:33 -04:00