Compare commits

...

8 Commits

Author SHA1 Message Date
Tom Alexander
4776898894
Merge branch 'fix_plain_list'
Some checks failed
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has failed
rust-build Build rust-build has succeeded
2023-08-16 21:06:54 -04:00
Tom Alexander
8e95ce6368
Add notes about plain list trailing blank line ownership investigation. 2023-08-16 20:24:00 -04:00
Tom Alexander
6c9c304f37
Re-enable disabled test. 2023-08-16 17:39:10 -04:00
Tom Alexander
7fafbfb6bb
Do not consume whitespace in the final plain list item. 2023-08-16 17:37:19 -04:00
Tom Alexander
56281633f3
Support blank link in plain_list_item_end, move exit matcher to end of loop in plain_list, and maybe consume trailing whitespace in plain_list_item. 2023-08-16 17:09:06 -04:00
Tom Alexander
823c33ef8e
Reduce use of expect in main.rs 2023-08-16 16:37:14 -04:00
Tom Alexander
7df393f31d
Make a new naive implementation of plain_list_item.
Still need to update plain_list_item_end and handle the whitespace ownership issues, but starting from a simplified state will help.
2023-08-16 16:05:24 -04:00
Tom Alexander
72d5f8f35c
Make a new naive implementation of plain_list. 2023-08-16 16:05:24 -04:00
4 changed files with 220 additions and 147 deletions

View File

@ -75,7 +75,6 @@ fn is_expect_fail(name: &str) -> Option<&str> {
"element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
"paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
"export_snippet_paragraph_break_precedence" => Some("The latest code for org-mode is matching the export snippet without the closing @@."), // https://list.orgmode.org/orgmode/fb61ea28-f004-4c25-adf7-69fc55683ed4@app.fastmail.com/T/#u
"plain_lists_trailing_whitespace_ownership" => Some("My plain list implementation is currently broken."),
_ => None,
}
}

View File

@ -0,0 +1,128 @@
* Test 1
** Source
#+begin_src org
1. foo
1. bar
2. baz
2. lorem
ipsum
#+end_src
** Ownership
This table is just showing ownership for the plain list items, not the containing plain list nor the elements inside each item.
| Plain List *Item* | Owns trailing blank lines |
|------------------------+---------------------------|
| foo (includes bar baz) | Yes |
| bar | Yes |
| baz | Yes |
| lorem | No |
** Analysis
This seems to imply that plain list items own their trailing blank lines except for the final plain list item in the top-most plain list which does not own its trailing blank lines.
* Test 2
** Source
#+begin_src org
1. foo
bar
1. baz
lorem
ipsum
dolar
#+end_src
** Ownership
This table is just showing ownership for the plain list items, not the containing plain list nor the elements inside each item.
| Plain List *Item* | Owns trailing blank lines |
|--------------------------+---------------------------|
| foo -> ipsum (inclusive) | No |
| baz lorem | No |
** Analysis
This shows that the final plain list item in a nested plain list (baz lorem) does not own its trailing blank lines which conflicts with "baz" from Test 1.
* Test 3
** Source
#+begin_src org
1. foo
1. bar
baz
2. lorem
ipsum
#+end_src
** Ownership
| Plain List *Item* | Owns trailing blank lines |
|------------------------+---------------------------|
| foo (includes bar baz) | Yes |
| bar baz | Yes |
| lorem | No |
** Analysis
This was to test if having an extra paragraph in the final list item in the nested list changes the behavior. The behavior is consistent with Test 1, so the extra paragraph is not the cause of the discrepancy.
* Test 4
** Source
#+begin_src org
1. foo
1. bar
2. baz
candy
2. lorem
ipsum
#+end_src
** Ownership
| Plain List *Item* | Owns trailing blank lines |
|----------------------------------+---------------------------|
| foo (includes bar baz and candy) | Yes |
| bar | Yes |
| baz | No |
| lorem | No |
** Analysis
This was to test if putting a non-plain-list element at the end of foo changes the ownership of blank lines. baz changed to no longer owning its trailing whitespace.
This seems to imply that list items own their trailing whitespace except for the final item unless that list item is at the end of a list item.
* Test 5
** Source
#+begin_src org
1. foo
1. bar
2. baz
candy
2. lorem
1. cat
2. dog
ipsum
#+end_src
** Ownership
| Plain List *Item* | Owns trailing blank lines |
|----------------------------------+---------------------------|
| foo (includes bar baz and candy) | Yes |
| bar | Yes |
| baz | No |
| lorem (includes cat and dog) | No |
| cat | Yes |
| dog | No |
** Analysis
This breaks the theory that the final list item nested at the end of a list item gets to own its trailing blank lines since dog does not own its blank lines despite Test 1's baz owning its blank lines.
New Theory: final list items only own their blank lines if they are nested at the end of a non-final list item.

View File

@ -39,16 +39,15 @@ fn main_body() -> Result<(), Box<dyn std::error::Error>> {
run_compare(
std::env::args()
.nth(1)
.expect("Pass a single file into this script."),
.ok_or("Pass a single file into this script.")?,
)
}
#[cfg(feature = "compare")]
fn run_compare<P: AsRef<Path>>(todo_org_path: P) -> Result<(), Box<dyn std::error::Error>> {
let org_contents = std::fs::read_to_string(todo_org_path.as_ref()).expect("Read org file.");
let org_contents = std::fs::read_to_string(todo_org_path.as_ref())?;
let (remaining, rust_parsed) = document(org_contents.as_str()).expect("Org Parse failure");
let org_sexp =
emacs_parse_org_document(todo_org_path.as_ref()).expect("Use emacs to parse org file.");
let org_sexp = emacs_parse_org_document(todo_org_path.as_ref())?;
let (_remaining, parsed_sexp) =
sexp_with_padding(org_sexp.as_str()).expect("Sexp Parse failure");
@ -57,11 +56,8 @@ fn run_compare<P: AsRef<Path>>(todo_org_path: P) -> Result<(), Box<dyn std::erro
println!("{:#?}", rust_parsed);
// We do the diffing after printing out both parsed forms in case the diffing panics
let diff_result =
compare_document(&parsed_sexp, &rust_parsed).expect("Compare parsed documents.");
diff_result
.print()
.expect("Print document parse tree diff.");
let diff_result = compare_document(&parsed_sexp, &rust_parsed)?;
diff_result.print()?;
if diff_result.is_bad() {
Err("Diff results do not match.")?;

View File

@ -6,6 +6,7 @@ use nom::character::complete::one_of;
use nom::character::complete::space0;
use nom::character::complete::space1;
use nom::combinator::eof;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
@ -14,8 +15,6 @@ use nom::multi::many_till;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
#[cfg(feature = "tracing")]
use tracing::span;
use super::greater_element::PlainList;
use super::greater_element::PlainListItem;
@ -32,6 +31,7 @@ use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::util::blank_line;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
use crate::parser::util::start_of_line;
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -42,107 +42,64 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
class: ExitClass::Beta,
exit_matcher: &plain_list_end,
}));
let without_consume_context =
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
let with_consume_context =
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true));
let without_consume_matcher = parser_with_context!(plain_list_item)(&without_consume_context);
let with_consume_matcher = parser_with_context!(plain_list_item)(&with_consume_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
// children stores tuple of (input string, parsed object) so we can re-parse the final item
let mut children = Vec::new();
let mut first_item_indentation: Option<usize> = None;
let mut remaining = input;
// The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here:
//
// 1. Parse all items while consuming trailing whitespace, then edit the final item to remove trailing whitespace.
// 2. Parse all items without consuming trailing whitespace, then edit all but the final one to add in the trailing whitespace.
// 3. Re-parse the final item with consume trailing whitespace disabled.
//
// While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed.
loop {
/*
Trailing whitespace belongs to the plain list, not the plain list item
Possible outcomes:
Don't consume, yes exit matcher
Don't consume, no additional item
Consume, additional item
*/
{
// Don't consume, yes exit matcher
#[cfg(feature = "tracing")]
let span = span!(tracing::Level::DEBUG, "first");
#[cfg(feature = "tracing")]
let _enter = span.enter();
let last_item_then_exit = tuple((without_consume_matcher, exit_matcher))(remaining);
match last_item_then_exit {
Ok((remain, (item, _exit)))
if item.indentation
== *first_item_indentation.get_or_insert(item.indentation) =>
{
remaining = remain;
children.push(item);
break;
}
Ok(_) | Err(_) => {}
};
}
{
// Consume, additional item
#[cfg(feature = "tracing")]
let span = span!(tracing::Level::DEBUG, "second");
#[cfg(feature = "tracing")]
let _enter = span.enter();
let not_last_item =
tuple((with_consume_matcher, peek(without_consume_matcher)))(remaining);
match not_last_item {
Ok((remain, (item, future_item)))
if item.indentation
== *first_item_indentation.get_or_insert(item.indentation)
&& future_item.indentation
== *first_item_indentation.get_or_insert(item.indentation) =>
{
remaining = remain;
children.push(item);
continue;
}
Ok(_) | Err(_) => {}
};
}
{
// Don't consume, no additional item
#[cfg(feature = "tracing")]
let span = span!(tracing::Level::DEBUG, "third");
#[cfg(feature = "tracing")]
let _enter = span.enter();
let last_item_then_exit = without_consume_matcher(remaining);
match last_item_then_exit {
let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining);
match list_item {
Ok((remain, item))
if item.indentation
== *first_item_indentation.get_or_insert(item.indentation) =>
if item.indentation == *first_item_indentation.get_or_insert(item.indentation) =>
{
children.push((remaining, item));
remaining = remain;
children.push(item);
break;
}
Ok(_) | Err(_) => {
// TODO: Maybe this is reachable when there are no items at all.
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Should be unreachable.",
))));
// unreachable!();
break;
}
};
let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining);
if maybe_exit.is_ok() {
break;
}
}
if children.is_empty() {
let (final_child_start, _final_item_first_parse) = match children.pop() {
Some(final_child) => final_child,
None => {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Plain lists require at least one element.",
))));
}
};
let final_item_context =
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
let (remaining, reparsed_final_item) =
parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?;
children.push((final_child_start, reparsed_final_item));
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, PlainList { source, children }))
Ok((
remaining,
PlainList {
source,
children: children.into_iter().map(|(_start, item)| item).collect(),
},
))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -154,29 +111,12 @@ pub fn plain_list_item<'r, 's>(
let (remaining, leading_whitespace) = space0(input)?;
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
let indent_level = leading_whitespace.len();
let with_consume_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &plain_list_item_end,
}));
let without_consume_context = context
.with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &plain_list_item_end,
}));
let with_consume_matcher = parser_with_context!(element(true))(&with_consume_context);
let without_consume_matcher = parser_with_context!(element(true))(&without_consume_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
let (remaining, bull) =
verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?;
let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining);
match maybe_contentless_item {
Ok((rem, _ws)) => {
// TODO: do we need to consume if this isn't the last item?
let source = get_consumed(input, rem);
return Ok((
rem,
@ -188,19 +128,29 @@ pub fn plain_list_item<'r, 's>(
},
));
}
Err(_) => {
Err(_) => {}
};
let (remaining, _ws) = space1(remaining)?;
let (remaining, (mut contents, final_element)) = many_till(
&with_consume_matcher,
alt((
terminated(&without_consume_matcher, exit_matcher),
preceded(
peek(tuple((&with_consume_matcher, exit_matcher))),
&without_consume_matcher,
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &plain_list_item_end,
}));
let (remaining, (children, _exit_contents)) = verify(
many_till(
parser_with_context!(element(true))(&parser_context),
parser_with_context!(exit_matcher_parser)(&parser_context),
),
)),
|(children, _exit_contents)| !children.is_empty(),
)(remaining)?;
contents.push(final_element);
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
return Ok((
remaining,
@ -208,11 +158,9 @@ pub fn plain_list_item<'r, 's>(
source,
indentation: indent_level,
bullet: bull,
children: contents,
children,
},
));
}
};
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -241,7 +189,11 @@ fn plain_list_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
recognize(parser_with_context!(line_indented_lte)(context))(input)
start_of_line(context, input)?;
recognize(tuple((
opt(blank_line),
parser_with_context!(line_indented_lte)(context),
)))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
@ -251,8 +203,6 @@ fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'
"Not inside a plain list item",
))))?;
start_of_line(context, input)?;
let matched = recognize(verify(
tuple((space0::<&str, _>, non_whitespace_character)),
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)