Merge branch 'fix_plain_list'
rustfmt Build rustfmt has succeeded Details
rust-test Build rust-test has failed Details
rust-build Build rust-build has succeeded Details

This commit is contained in:
Tom Alexander 2023-08-16 21:06:54 -04:00
commit 4776898894
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
4 changed files with 220 additions and 147 deletions

View File

@ -75,7 +75,6 @@ fn is_expect_fail(name: &str) -> Option<&str> {
"element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
"paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
"export_snippet_paragraph_break_precedence" => Some("The latest code for org-mode is matching the export snippet without the closing @@."), // https://list.orgmode.org/orgmode/fb61ea28-f004-4c25-adf7-69fc55683ed4@app.fastmail.com/T/#u
"plain_lists_trailing_whitespace_ownership" => Some("My plain list implementation is currently broken."),
_ => None,
}
}

View File

@ -0,0 +1,128 @@
* Test 1
** Source
#+begin_src org
1. foo
1. bar
2. baz
2. lorem
ipsum
#+end_src
** Ownership
This table is just showing ownership for the plain list items, not the containing plain list nor the elements inside each item.
| Plain List *Item* | Owns trailing blank lines |
|------------------------+---------------------------|
| foo (includes bar baz) | Yes |
| bar | Yes |
| baz | Yes |
| lorem | No |
** Analysis
This seems to imply that plain list items own their trailing blank lines except for the final plain list item in the top-most plain list which does not own its trailing blank lines.
* Test 2
** Source
#+begin_src org
1. foo
bar
1. baz
lorem
ipsum
dolar
#+end_src
** Ownership
This table is just showing ownership for the plain list items, not the containing plain list nor the elements inside each item.
| Plain List *Item* | Owns trailing blank lines |
|--------------------------+---------------------------|
| foo -> ipsum (inclusive) | No |
| baz lorem | No |
** Analysis
This shows that the final plain list item in a nested plain list (baz lorem) does not own its trailing blank lines which conflicts with "baz" from Test 1.
* Test 3
** Source
#+begin_src org
1. foo
1. bar
baz
2. lorem
ipsum
#+end_src
** Ownership
| Plain List *Item* | Owns trailing blank lines |
|------------------------+---------------------------|
| foo (includes bar baz) | Yes |
| bar baz | Yes |
| lorem | No |
** Analysis
This was to test if having an extra paragraph in the final list item in the nested list changes the behavior. The behavior is consistent with Test 1, so the extra paragraph is not the cause of the discrepancy.
* Test 4
** Source
#+begin_src org
1. foo
1. bar
2. baz
candy
2. lorem
ipsum
#+end_src
** Ownership
| Plain List *Item* | Owns trailing blank lines |
|----------------------------------+---------------------------|
| foo (includes bar baz and candy) | Yes |
| bar | Yes |
| baz | No |
| lorem | No |
** Analysis
This was to test if putting a non-plain-list element at the end of foo changes the ownership of blank lines. baz changed to no longer owning its trailing whitespace.
This seems to imply that list items own their trailing whitespace except for the final item unless that list item is at the end of a list item.
* Test 5
** Source
#+begin_src org
1. foo
1. bar
2. baz
candy
2. lorem
1. cat
2. dog
ipsum
#+end_src
** Ownership
| Plain List *Item* | Owns trailing blank lines |
|----------------------------------+---------------------------|
| foo (includes bar baz and candy) | Yes |
| bar | Yes |
| baz | No |
| lorem (includes cat and dog) | No |
| cat | Yes |
| dog | No |
** Analysis
This breaks the theory that the final list item nested at the end of a list item gets to own its trailing blank lines since dog does not own its blank lines despite Test 1's baz owning its blank lines.
New Theory: final list items only own their blank lines if they are nested at the end of a non-final list item.

View File

@ -39,16 +39,15 @@ fn main_body() -> Result<(), Box<dyn std::error::Error>> {
run_compare(
std::env::args()
.nth(1)
.expect("Pass a single file into this script."),
.ok_or("Pass a single file into this script.")?,
)
}
#[cfg(feature = "compare")]
fn run_compare<P: AsRef<Path>>(todo_org_path: P) -> Result<(), Box<dyn std::error::Error>> {
let org_contents = std::fs::read_to_string(todo_org_path.as_ref()).expect("Read org file.");
let org_contents = std::fs::read_to_string(todo_org_path.as_ref())?;
let (remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure");
let org_sexp =
emacs_parse_org_document(todo_org_path.as_ref()).expect("Use emacs to parse org file.");
let org_sexp = emacs_parse_org_document(todo_org_path.as_ref())?;
let (_remaining, parsed_sexp) =
sexp_with_padding(org_sexp.as_str()).expect("Sexp Parse failure");
@ -57,11 +56,8 @@ fn run_compare<P: AsRef<Path>>(todo_org_path: P) -> Result<(), Box<dyn std::erro
println!("{:#?}", rust_parsed);
// We do the diffing after printing out both parsed forms in case the diffing panics
let diff_result =
compare_document(&parsed_sexp, &rust_parsed).expect("Compare parsed documents.");
diff_result
.print()
.expect("Print document parse tree diff.");
let diff_result = compare_document(&parsed_sexp, &rust_parsed)?;
diff_result.print()?;
if diff_result.is_bad() {
Err("Diff results do not match.")?;

View File

@ -6,6 +6,7 @@ use nom::character::complete::one_of;
use nom::character::complete::space0;
use nom::character::complete::space1;
use nom::combinator::eof;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
@ -14,8 +15,6 @@ use nom::multi::many_till;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
#[cfg(feature = "tracing")]
use tracing::span;
use super::greater_element::PlainList;
use super::greater_element::PlainListItem;
@ -32,6 +31,7 @@ use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::util::blank_line;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
use crate::parser::util::start_of_line;
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -42,107 +42,64 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
class: ExitClass::Beta,
exit_matcher: &plain_list_end,
}));
let without_consume_context =
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
let with_consume_context =
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true));
let without_consume_matcher = parser_with_context!(plain_list_item)(&without_consume_context);
let with_consume_matcher = parser_with_context!(plain_list_item)(&with_consume_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
// children stores tuple of (input string, parsed object) so we can re-parse the final item
let mut children = Vec::new();
let mut first_item_indentation: Option<usize> = None;
let mut remaining = input;
// The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here:
//
// 1. Parse all items while consuming trailing whitespace, then edit the final item to remove trailing whitespace.
// 2. Parse all items without consuming trailing whitespace, then edit all but the final one to add in the trailing whitespace.
// 3. Re-parse the final item with consume trailing whitespace disabled.
//
// While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed.
loop {
/*
Trailing whitespace belongs to the plain list, not the plain list item
let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining);
match list_item {
Ok((remain, item))
if item.indentation == *first_item_indentation.get_or_insert(item.indentation) =>
{
children.push((remaining, item));
remaining = remain;
}
Ok(_) | Err(_) => {
break;
}
};
Possible outcomes:
Don't consume, yes exit matcher
Don't consume, no additional item
Consume, additional item
*/
{
// Don't consume, yes exit matcher
#[cfg(feature = "tracing")]
let span = span!(tracing::Level::DEBUG, "first");
#[cfg(feature = "tracing")]
let _enter = span.enter();
let last_item_then_exit = tuple((without_consume_matcher, exit_matcher))(remaining);
match last_item_then_exit {
Ok((remain, (item, _exit)))
if item.indentation
== *first_item_indentation.get_or_insert(item.indentation) =>
{
remaining = remain;
children.push(item);
break;
}
Ok(_) | Err(_) => {}
};
}
{
// Consume, additional item
#[cfg(feature = "tracing")]
let span = span!(tracing::Level::DEBUG, "second");
#[cfg(feature = "tracing")]
let _enter = span.enter();
let not_last_item =
tuple((with_consume_matcher, peek(without_consume_matcher)))(remaining);
match not_last_item {
Ok((remain, (item, future_item)))
if item.indentation
== *first_item_indentation.get_or_insert(item.indentation)
&& future_item.indentation
== *first_item_indentation.get_or_insert(item.indentation) =>
{
remaining = remain;
children.push(item);
continue;
}
Ok(_) | Err(_) => {}
};
}
{
// Don't consume, no additional item
#[cfg(feature = "tracing")]
let span = span!(tracing::Level::DEBUG, "third");
#[cfg(feature = "tracing")]
let _enter = span.enter();
let last_item_then_exit = without_consume_matcher(remaining);
match last_item_then_exit {
Ok((remain, item))
if item.indentation
== *first_item_indentation.get_or_insert(item.indentation) =>
{
remaining = remain;
children.push(item);
break;
}
Ok(_) | Err(_) => {
// TODO: Maybe this is reachable when there are no items at all.
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Should be unreachable.",
))));
// unreachable!();
}
};
let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining);
if maybe_exit.is_ok() {
break;
}
}
if children.is_empty() {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Plain lists require at least one element.",
))));
}
let (final_child_start, _final_item_first_parse) = match children.pop() {
Some(final_child) => final_child,
None => {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Plain lists require at least one element.",
))));
}
};
let final_item_context =
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
let (remaining, reparsed_final_item) =
parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?;
children.push((final_child_start, reparsed_final_item));
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, PlainList { source, children }))
Ok((
remaining,
PlainList {
source,
children: children.into_iter().map(|(_start, item)| item).collect(),
},
))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -154,29 +111,12 @@ pub fn plain_list_item<'r, 's>(
let (remaining, leading_whitespace) = space0(input)?;
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
let indent_level = leading_whitespace.len();
let with_consume_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &plain_list_item_end,
}));
let without_consume_context = context
.with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &plain_list_item_end,
}));
let with_consume_matcher = parser_with_context!(element(true))(&with_consume_context);
let without_consume_matcher = parser_with_context!(element(true))(&without_consume_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
let (remaining, bull) =
verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?;
let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining);
match maybe_contentless_item {
Ok((rem, _ws)) => {
// TODO: do we need to consume if this isn't the last item?
let source = get_consumed(input, rem);
return Ok((
rem,
@ -188,31 +128,39 @@ pub fn plain_list_item<'r, 's>(
},
));
}
Err(_) => {
let (remaining, _ws) = space1(remaining)?;
let (remaining, (mut contents, final_element)) = many_till(
&with_consume_matcher,
alt((
terminated(&without_consume_matcher, exit_matcher),
preceded(
peek(tuple((&with_consume_matcher, exit_matcher))),
&without_consume_matcher,
),
)),
)(remaining)?;
contents.push(final_element);
let source = get_consumed(input, remaining);
return Ok((
remaining,
PlainListItem {
source,
indentation: indent_level,
bullet: bull,
children: contents,
},
));
}
Err(_) => {}
};
let (remaining, _ws) = space1(remaining)?;
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &plain_list_item_end,
}));
let (remaining, (children, _exit_contents)) = verify(
many_till(
parser_with_context!(element(true))(&parser_context),
parser_with_context!(exit_matcher_parser)(&parser_context),
),
|(children, _exit_contents)| !children.is_empty(),
)(remaining)?;
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
return Ok((
remaining,
PlainListItem {
source,
indentation: indent_level,
bullet: bull,
children,
},
));
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -241,7 +189,11 @@ fn plain_list_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
recognize(parser_with_context!(line_indented_lte)(context))(input)
start_of_line(context, input)?;
recognize(tuple((
opt(blank_line),
parser_with_context!(line_indented_lte)(context),
)))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -251,8 +203,6 @@ fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'
"Not inside a plain list item",
))))?;
start_of_line(context, input)?;
let matched = recognize(verify(
tuple((space0::<&str, _>, non_whitespace_character)),
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)