Merge branch 'fix_plain_list'

2023-08-16 21:06:54 -04:00 · 2023-08-16 21:06:54 -04:00 · 4776898894
commit 4776898894
parent e5e5120a10 8e95ce6368
4 changed files with 220 additions and 147 deletions
--- a/build.rs
+++ b/build.rs
@ -75,7 +75,6 @@ fn is_expect_fail(name: &str) -> Option<&str> {
        "element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
        "paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
        "export_snippet_paragraph_break_precedence" => Some("The latest code for org-mode is matching the export snippet without the closing @@."), // https://list.orgmode.org/orgmode/fb61ea28-f004-4c25-adf7-69fc55683ed4@app.fastmail.com/T/#u
-        "plain_lists_trailing_whitespace_ownership" => Some("My plain list implementation is currently broken."),
        _ => None,
    }
 }
--- a/notes/plain_list_ownership_notes.org
+++ b/notes/plain_list_ownership_notes.org
@ -0,0 +1,128 @@
+* Test 1
+** Source
+#+begin_src org
+  1. foo
+
+     1. bar
+
+     2. baz
+
+  2. lorem
+
+  ipsum
+#+end_src
+** Ownership
+This table is just showing ownership for the plain list items, not the containing plain list nor the elements inside each item.
+
+| Plain List *Item*      | Owns trailing blank lines |
+|------------------------+---------------------------|
+| foo (includes bar baz) | Yes                       |
+| bar                    | Yes                       |
+| baz                    | Yes                       |
+| lorem                  | No                        |
+** Analysis
+This seems to imply that plain list items own their trailing blank lines except for the final plain list item in the top-most plain list which does not own its trailing blank lines.
+* Test 2
+** Source
+#+begin_src org
+  1. foo
+
+     bar
+
+     1. baz
+
+        lorem
+
+     ipsum
+
+
+  dolar
+#+end_src
+** Ownership
+This table is just showing ownership for the plain list items, not the containing plain list nor the elements inside each item.
+
+| Plain List *Item*        | Owns trailing blank lines |
+|--------------------------+---------------------------|
+| foo -> ipsum (inclusive) | No                        |
+| baz lorem                | No                        |
+** Analysis
+This shows that the final plain list item in a nested plain list (baz lorem) does not own its trailing blank lines which conflicts with "baz" from Test 1.
+* Test 3
+** Source
+#+begin_src org
+  1. foo
+
+     1. bar
+
+        baz
+
+  2. lorem
+
+  ipsum
+#+end_src
+** Ownership
+| Plain List *Item*      | Owns trailing blank lines |
+|------------------------+---------------------------|
+| foo (includes bar baz) | Yes                       |
+| bar baz                | Yes                       |
+| lorem                  | No                        |
+** Analysis
+This was to test if having an extra paragraph in the final list item in the nested list changes the behavior. The behavior is consistent with Test 1, so the extra paragraph is not the cause of the discrepancy.
+* Test 4
+** Source
+#+begin_src org
+  1. foo
+
+     1. bar
+
+     2. baz
+
+     candy
+
+  2. lorem
+
+  ipsum
+#+end_src
+** Ownership
+| Plain List *Item*                | Owns trailing blank lines |
+|----------------------------------+---------------------------|
+| foo (includes bar baz and candy) | Yes                       |
+| bar                              | Yes                       |
+| baz                              | No                        |
+| lorem                            | No                        |
+** Analysis
+This was to test if putting a non-plain-list element at the end of foo changes the ownership of blank lines. baz changed to no longer owning its trailing whitespace.
+
+This seems to imply that list items own their trailing whitespace except for the final item unless that list item is at the end of a list item.
+* Test 5
+** Source
+#+begin_src org
+  1. foo
+
+     1. bar
+
+     2. baz
+
+     candy
+
+  2. lorem
+
+     1. cat
+
+     2. dog
+
+  ipsum
+#+end_src
+** Ownership
+| Plain List *Item*                | Owns trailing blank lines |
+|----------------------------------+---------------------------|
+| foo (includes bar baz and candy) | Yes                       |
+| bar                              | Yes                       |
+| baz                              | No                        |
+| lorem (includes cat and dog)     | No                        |
+| cat                              | Yes                       |
+| dog                              | No                        |
+** Analysis
+This breaks the theory that the final list item nested at the end of a list item gets to own its trailing blank lines since dog does not own its blank lines despite Test 1's baz owning its blank lines.
+
+New Theory: final list items only own their blank lines if they are nested at the end of a non-final list item.
--- a/src/main.rs
+++ b/src/main.rs
@ -39,16 +39,15 @@ fn main_body() -> Result<(), Box<dyn std::error::Error>> {
    run_compare(
        std::env::args()
            .nth(1)
-            .expect("Pass a single file into this script."),
+            .ok_or("Pass a single file into this script.")?,
    )
 }

 #[cfg(feature = "compare")]
 fn run_compare<P: AsRef<Path>>(todo_org_path: P) -> Result<(), Box<dyn std::error::Error>> {
-    let org_contents = std::fs::read_to_string(todo_org_path.as_ref()).expect("Read org file.");
+    let org_contents = std::fs::read_to_string(todo_org_path.as_ref())?;
    let (remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure");
-    let org_sexp =
-        emacs_parse_org_document(todo_org_path.as_ref()).expect("Use emacs to parse org file.");
+    let org_sexp = emacs_parse_org_document(todo_org_path.as_ref())?;
    let (_remaining, parsed_sexp) =
        sexp_with_padding(org_sexp.as_str()).expect("Sexp Parse failure");

@ -57,11 +56,8 @@ fn run_compare<P: AsRef<Path>>(todo_org_path: P) -> Result<(), Box<dyn std::erro
    println!("{:#?}", rust_parsed);

    // We do the diffing after printing out both parsed forms in case the diffing panics
-    let diff_result =
-        compare_document(&parsed_sexp, &rust_parsed).expect("Compare parsed documents.");
-    diff_result
-        .print()
-        .expect("Print document parse tree diff.");
+    let diff_result = compare_document(&parsed_sexp, &rust_parsed)?;
+    diff_result.print()?;

    if diff_result.is_bad() {
        Err("Diff results do not match.")?;
--- a/src/parser/plain_list.rs
+++ b/src/parser/plain_list.rs
@ -6,6 +6,7 @@ use nom::character::complete::one_of;
 use nom::character::complete::space0;
 use nom::character::complete::space1;
 use nom::combinator::eof;
+use nom::combinator::opt;
 use nom::combinator::peek;
 use nom::combinator::recognize;
 use nom::combinator::verify;
@ -14,8 +15,6 @@ use nom::multi::many_till;
 use nom::sequence::preceded;
 use nom::sequence::terminated;
 use nom::sequence::tuple;
-#[cfg(feature = "tracing")]
-use tracing::span;

 use super::greater_element::PlainList;
 use super::greater_element::PlainListItem;
@ -32,6 +31,7 @@ use crate::parser::parser_context::ExitMatcherNode;
 use crate::parser::util::blank_line;
 use crate::parser::util::exit_matcher_parser;
 use crate::parser::util::get_consumed;
+use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
 use crate::parser::util::start_of_line;

 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -42,107 +42,64 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
            class: ExitClass::Beta,
            exit_matcher: &plain_list_end,
        }));
-    let without_consume_context =
-        parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
-    let with_consume_context =
-        parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true));
-    let without_consume_matcher = parser_with_context!(plain_list_item)(&without_consume_context);
-    let with_consume_matcher = parser_with_context!(plain_list_item)(&with_consume_context);
-    let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
+    // children stores tuple of (input string, parsed object) so we can re-parse the final item
    let mut children = Vec::new();
    let mut first_item_indentation: Option<usize> = None;
    let mut remaining = input;

+    // The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here:
+    //
+    // 1. Parse all items while consuming trailing whitespace, then edit the final item to remove trailing whitespace.
+    // 2. Parse all items without consuming trailing whitespace, then edit all but the final one to add in the trailing whitespace.
+    // 3. Re-parse the final item with consume trailing whitespace disabled.
+    //
+    // While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed.
+
    loop {
-        /*
-        Trailing whitespace belongs to the plain list, not the plain list item
+        let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining);
+        match list_item {
+            Ok((remain, item))
+                if item.indentation == *first_item_indentation.get_or_insert(item.indentation) =>
+            {
+                children.push((remaining, item));
+                remaining = remain;
+            }
+            Ok(_) | Err(_) => {
+                break;
+            }
+        };

-        Possible outcomes:
-        Don't consume, yes exit matcher
-        Don't consume, no additional item
-        Consume, additional item
-         */
-        {
-            // Don't consume, yes exit matcher
-            #[cfg(feature = "tracing")]
-            let span = span!(tracing::Level::DEBUG, "first");
-            #[cfg(feature = "tracing")]
-            let _enter = span.enter();
-
-            let last_item_then_exit = tuple((without_consume_matcher, exit_matcher))(remaining);
-            match last_item_then_exit {
-                Ok((remain, (item, _exit)))
-                    if item.indentation
-                        == *first_item_indentation.get_or_insert(item.indentation) =>
-                {
-                    remaining = remain;
-                    children.push(item);
-                    break;
-                }
-                Ok(_) | Err(_) => {}
-            };
-        }
-
-        {
-            // Consume, additional item
-            #[cfg(feature = "tracing")]
-            let span = span!(tracing::Level::DEBUG, "second");
-            #[cfg(feature = "tracing")]
-            let _enter = span.enter();
-
-            let not_last_item =
-                tuple((with_consume_matcher, peek(without_consume_matcher)))(remaining);
-            match not_last_item {
-                Ok((remain, (item, future_item)))
-                    if item.indentation
-                        == *first_item_indentation.get_or_insert(item.indentation)
-                        && future_item.indentation
-                            == *first_item_indentation.get_or_insert(item.indentation) =>
-                {
-                    remaining = remain;
-                    children.push(item);
-                    continue;
-                }
-                Ok(_) | Err(_) => {}
-            };
-        }
-
-        {
-            // Don't consume, no additional item
-            #[cfg(feature = "tracing")]
-            let span = span!(tracing::Level::DEBUG, "third");
-            #[cfg(feature = "tracing")]
-            let _enter = span.enter();
-
-            let last_item_then_exit = without_consume_matcher(remaining);
-            match last_item_then_exit {
-                Ok((remain, item))
-                    if item.indentation
-                        == *first_item_indentation.get_or_insert(item.indentation) =>
-                {
-                    remaining = remain;
-                    children.push(item);
-                    break;
-                }
-                Ok(_) | Err(_) => {
-                    // TODO: Maybe this is reachable when there are no items at all.
-                    return Err(nom::Err::Error(CustomError::MyError(MyError(
-                        "Should be unreachable.",
-                    ))));
-                    // unreachable!();
-                }
-            };
+        let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining);
+        if maybe_exit.is_ok() {
+            break;
        }
    }

-    if children.is_empty() {
-        return Err(nom::Err::Error(CustomError::MyError(MyError(
-            "Plain lists require at least one element.",
-        ))));
-    }
+    let (final_child_start, _final_item_first_parse) = match children.pop() {
+        Some(final_child) => final_child,
+        None => {
+            return Err(nom::Err::Error(CustomError::MyError(MyError(
+                "Plain lists require at least one element.",
+            ))));
+        }
+    };
+    let final_item_context =
+        parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
+    let (remaining, reparsed_final_item) =
+        parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?;
+    children.push((final_child_start, reparsed_final_item));
+
+    let (remaining, _trailing_ws) =
+        maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;

    let source = get_consumed(input, remaining);
-    Ok((remaining, PlainList { source, children }))
+    Ok((
+        remaining,
+        PlainList {
+            source,
+            children: children.into_iter().map(|(_start, item)| item).collect(),
+        },
+    ))
 }

 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -154,29 +111,12 @@ pub fn plain_list_item<'r, 's>(
    let (remaining, leading_whitespace) = space0(input)?;
    // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
    let indent_level = leading_whitespace.len();
-    let with_consume_context = context
-        .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
-        .with_additional_node(ContextElement::ListItem(indent_level))
-        .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
-            class: ExitClass::Beta,
-            exit_matcher: &plain_list_item_end,
-        }));
-    let without_consume_context = context
-        .with_additional_node(ContextElement::ListItem(indent_level))
-        .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
-            class: ExitClass::Beta,
-            exit_matcher: &plain_list_item_end,
-        }));
-
-    let with_consume_matcher = parser_with_context!(element(true))(&with_consume_context);
-    let without_consume_matcher = parser_with_context!(element(true))(&without_consume_context);
-    let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
    let (remaining, bull) =
        verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?;
+
    let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining);
    match maybe_contentless_item {
        Ok((rem, _ws)) => {
-            // TODO: do we need to consume if this isn't the last item?
            let source = get_consumed(input, rem);
            return Ok((
                rem,
@ -188,31 +128,39 @@ pub fn plain_list_item<'r, 's>(
                },
            ));
        }
-        Err(_) => {
-            let (remaining, _ws) = space1(remaining)?;
-            let (remaining, (mut contents, final_element)) = many_till(
-                &with_consume_matcher,
-                alt((
-                    terminated(&without_consume_matcher, exit_matcher),
-                    preceded(
-                        peek(tuple((&with_consume_matcher, exit_matcher))),
-                        &without_consume_matcher,
-                    ),
-                )),
-            )(remaining)?;
-            contents.push(final_element);
-            let source = get_consumed(input, remaining);
-            return Ok((
-                remaining,
-                PlainListItem {
-                    source,
-                    indentation: indent_level,
-                    bullet: bull,
-                    children: contents,
-                },
-            ));
-        }
+        Err(_) => {}
    };
+
+    let (remaining, _ws) = space1(remaining)?;
+    let parser_context = context
+        .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
+        .with_additional_node(ContextElement::ListItem(indent_level))
+        .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
+            class: ExitClass::Beta,
+            exit_matcher: &plain_list_item_end,
+        }));
+
+    let (remaining, (children, _exit_contents)) = verify(
+        many_till(
+            parser_with_context!(element(true))(&parser_context),
+            parser_with_context!(exit_matcher_parser)(&parser_context),
+        ),
+        |(children, _exit_contents)| !children.is_empty(),
+    )(remaining)?;
+
+    let (remaining, _trailing_ws) =
+        maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
+
+    let source = get_consumed(input, remaining);
+    return Ok((
+        remaining,
+        PlainListItem {
+            source,
+            indentation: indent_level,
+            bullet: bull,
+            children,
+        },
+    ));
 }

 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -241,7 +189,11 @@ fn plain_list_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s

 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
 fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
-    recognize(parser_with_context!(line_indented_lte)(context))(input)
+    start_of_line(context, input)?;
+    recognize(tuple((
+        opt(blank_line),
+        parser_with_context!(line_indented_lte)(context),
+    )))(input)
 }

 #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -251,8 +203,6 @@ fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'
            "Not inside a plain list item",
        ))))?;

-    start_of_line(context, input)?;
-
    let matched = recognize(verify(
        tuple((space0::<&str, _>, non_whitespace_character)),
        // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)