451 lines
16 KiB
Rust
451 lines
16 KiB
Rust
use super::greater_element::PlainList;
|
|
use super::greater_element::PlainListItem;
|
|
use super::parser_with_context::parser_with_context;
|
|
|
|
use super::util::non_whitespace_character;
|
|
use super::Context;
|
|
use crate::error::CustomError;
|
|
use crate::error::MyError;
|
|
use crate::error::Res;
|
|
use crate::parser::element_parser::element;
|
|
use crate::parser::exiting::ExitClass;
|
|
use crate::parser::parser_context::ContextElement;
|
|
use crate::parser::parser_context::ExitMatcherNode;
|
|
use crate::parser::util::blank_line;
|
|
use crate::parser::util::exit_matcher_parser;
|
|
use crate::parser::util::get_consumed;
|
|
use crate::parser::util::start_of_line;
|
|
use nom::branch::alt;
|
|
use nom::bytes::complete::tag;
|
|
use nom::character::complete::digit1;
|
|
use nom::character::complete::line_ending;
|
|
use nom::character::complete::one_of;
|
|
use nom::character::complete::space0;
|
|
use nom::character::complete::space1;
|
|
use nom::combinator::eof;
|
|
use nom::combinator::peek;
|
|
use nom::combinator::recognize;
|
|
use nom::combinator::verify;
|
|
use nom::multi::many1;
|
|
use nom::multi::many_till;
|
|
use nom::sequence::preceded;
|
|
use nom::sequence::terminated;
|
|
use nom::sequence::tuple;
|
|
use tracing::span;
|
|
|
|
#[tracing::instrument(ret, level = "debug")]
|
|
pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> {
|
|
let parser_context = context
|
|
.with_additional_node(ContextElement::Context("plain list"))
|
|
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
|
class: ExitClass::Beta,
|
|
exit_matcher: &plain_list_end,
|
|
}));
|
|
let without_consume_context =
|
|
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
|
|
let with_consume_context =
|
|
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true));
|
|
let without_consume_matcher = parser_with_context!(plain_list_item)(&without_consume_context);
|
|
let with_consume_matcher = parser_with_context!(plain_list_item)(&with_consume_context);
|
|
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
|
|
let mut children = Vec::new();
|
|
let mut first_item_indentation: Option<usize> = None;
|
|
let mut remaining = input;
|
|
|
|
loop {
|
|
/*
|
|
Trailing whitespace belongs to the plain list, not the plain list item
|
|
|
|
Possible outcomes:
|
|
Don't consume, yes exit matcher
|
|
Don't consume, no additional item
|
|
Consume, additional item
|
|
*/
|
|
{
|
|
// Don't consume, yes exit matcher
|
|
let span = span!(tracing::Level::DEBUG, "first");
|
|
let _enter = span.enter();
|
|
|
|
let last_item_then_exit = tuple((without_consume_matcher, exit_matcher))(remaining);
|
|
match last_item_then_exit {
|
|
Ok((remain, (item, _exit)))
|
|
if item.indentation
|
|
== *first_item_indentation.get_or_insert(item.indentation) =>
|
|
{
|
|
remaining = remain;
|
|
children.push(item);
|
|
break;
|
|
}
|
|
Ok(_) | Err(_) => {}
|
|
};
|
|
}
|
|
|
|
{
|
|
// Consume, additional item
|
|
let span = span!(tracing::Level::DEBUG, "second");
|
|
let _enter = span.enter();
|
|
|
|
let not_last_item =
|
|
tuple((with_consume_matcher, peek(without_consume_matcher)))(remaining);
|
|
match not_last_item {
|
|
Ok((remain, (item, future_item)))
|
|
if item.indentation
|
|
== *first_item_indentation.get_or_insert(item.indentation)
|
|
&& future_item.indentation
|
|
== *first_item_indentation.get_or_insert(item.indentation) =>
|
|
{
|
|
remaining = remain;
|
|
children.push(item);
|
|
continue;
|
|
}
|
|
Ok(_) | Err(_) => {}
|
|
};
|
|
}
|
|
|
|
{
|
|
// Don't consume, no additional item
|
|
let span = span!(tracing::Level::DEBUG, "third");
|
|
let _enter = span.enter();
|
|
|
|
let last_item_then_exit = without_consume_matcher(remaining);
|
|
match last_item_then_exit {
|
|
Ok((remain, item))
|
|
if item.indentation
|
|
== *first_item_indentation.get_or_insert(item.indentation) =>
|
|
{
|
|
remaining = remain;
|
|
children.push(item);
|
|
break;
|
|
}
|
|
Ok(_) | Err(_) => {
|
|
// TODO: Maybe this is reachable when there are no items at all.
|
|
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
|
"Should be unreachable.",
|
|
))));
|
|
// unreachable!();
|
|
}
|
|
};
|
|
}
|
|
}
|
|
|
|
if children.is_empty() {
|
|
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
|
"Plain lists require at least one element.",
|
|
))));
|
|
}
|
|
|
|
let source = get_consumed(input, remaining);
|
|
Ok((remaining, PlainList { source, children }))
|
|
}
|
|
|
|
#[tracing::instrument(ret, level = "debug")]
|
|
pub fn plain_list_item<'r, 's>(
|
|
context: Context<'r, 's>,
|
|
input: &'s str,
|
|
) -> Res<&'s str, PlainListItem<'s>> {
|
|
start_of_line(context, input)?;
|
|
let (remaining, leading_whitespace) = space0(input)?;
|
|
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|
|
let indent_level = leading_whitespace.len();
|
|
let with_consume_context = context
|
|
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
|
|
.with_additional_node(ContextElement::ListItem(indent_level))
|
|
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
|
class: ExitClass::Beta,
|
|
exit_matcher: &plain_list_item_end,
|
|
}));
|
|
let without_consume_context = context
|
|
.with_additional_node(ContextElement::ListItem(indent_level))
|
|
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
|
class: ExitClass::Beta,
|
|
exit_matcher: &plain_list_item_end,
|
|
}));
|
|
|
|
let with_consume_matcher = parser_with_context!(element)(&with_consume_context);
|
|
let without_consume_matcher = parser_with_context!(element)(&without_consume_context);
|
|
let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
|
|
let (remaining, bull) =
|
|
verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?;
|
|
let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining);
|
|
match maybe_contentless_item {
|
|
Ok((rem, _ws)) => {
|
|
// TODO: do we need to consume if this isn't the last item?
|
|
let source = get_consumed(input, rem);
|
|
return Ok((
|
|
rem,
|
|
PlainListItem {
|
|
source,
|
|
indentation: indent_level,
|
|
bullet: bull,
|
|
children: Vec::new(),
|
|
},
|
|
));
|
|
}
|
|
Err(_) => {
|
|
let (remaining, _ws) = space1(remaining)?;
|
|
let (remaining, (mut contents, final_element)) = many_till(
|
|
with_consume_matcher,
|
|
alt((
|
|
terminated(without_consume_matcher, exit_matcher),
|
|
preceded(
|
|
peek(tuple((with_consume_matcher, exit_matcher))),
|
|
without_consume_matcher,
|
|
),
|
|
)),
|
|
)(remaining)?;
|
|
contents.push(final_element);
|
|
let source = get_consumed(input, remaining);
|
|
return Ok((
|
|
remaining,
|
|
PlainListItem {
|
|
source,
|
|
indentation: indent_level,
|
|
bullet: bull,
|
|
children: contents,
|
|
},
|
|
));
|
|
}
|
|
};
|
|
}
|
|
|
|
#[tracing::instrument(ret, level = "debug")]
|
|
fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> {
|
|
alt((
|
|
tag("*"),
|
|
tag("-"),
|
|
tag("+"),
|
|
recognize(tuple((counter, alt((tag("."), tag(")")))))),
|
|
))(i)
|
|
}
|
|
|
|
#[tracing::instrument(ret, level = "debug")]
|
|
fn counter<'s>(i: &'s str) -> Res<&'s str, &'s str> {
|
|
alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i)
|
|
}
|
|
|
|
#[tracing::instrument(ret, level = "debug")]
|
|
fn plain_list_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
|
let start_of_line_matcher = parser_with_context!(start_of_line)(context);
|
|
recognize(tuple((
|
|
start_of_line_matcher,
|
|
verify(many1(blank_line), |lines: &Vec<&str>| lines.len() >= 2),
|
|
)))(input)
|
|
}
|
|
|
|
#[tracing::instrument(ret, level = "debug")]
|
|
fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
|
let current_item_indent_level: &usize =
|
|
get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError(
|
|
"Not inside a plain list item",
|
|
))))?;
|
|
let plain_list_item_matcher = parser_with_context!(plain_list_item)(context);
|
|
let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context);
|
|
alt((
|
|
recognize(verify(plain_list_item_matcher, |pli| {
|
|
pli.indentation <= *current_item_indent_level
|
|
})),
|
|
recognize(line_indented_lte_matcher),
|
|
))(input)
|
|
}
|
|
|
|
#[tracing::instrument(ret, level = "debug")]
|
|
fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
|
let current_item_indent_level: &usize =
|
|
get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError(
|
|
"Not inside a plain list item",
|
|
))))?;
|
|
|
|
start_of_line(context, input)?;
|
|
|
|
let matched = recognize(verify(
|
|
tuple((space0::<&str, _>, non_whitespace_character)),
|
|
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|
|
|(_space0, _anychar)| _space0.len() <= *current_item_indent_level,
|
|
))(input)?;
|
|
|
|
Ok(matched)
|
|
}
|
|
|
|
fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> {
|
|
for thing in context.iter() {
|
|
match thing.get_data() {
|
|
ContextElement::ListItem(depth) => return Some(depth),
|
|
_ => {}
|
|
};
|
|
}
|
|
None
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use crate::parser::parser_context::ContextElement;
|
|
use crate::parser::parser_context::ContextTree;
|
|
use crate::parser::parser_with_context::parser_with_context;
|
|
use crate::parser::Source;
|
|
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn plain_list_item_empty() {
|
|
let input = "1.";
|
|
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
|
let document_context =
|
|
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
|
|
let plain_list_item_matcher = parser_with_context!(plain_list_item)(&document_context);
|
|
let (remaining, result) = plain_list_item_matcher(input).unwrap();
|
|
assert_eq!(remaining, "");
|
|
assert_eq!(result.source, "1.");
|
|
}
|
|
|
|
#[test]
|
|
fn plain_list_item_simple() {
|
|
let input = "1. foo";
|
|
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
|
let document_context =
|
|
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
|
|
let plain_list_item_matcher = parser_with_context!(plain_list_item)(&document_context);
|
|
let (remaining, result) = plain_list_item_matcher(input).unwrap();
|
|
assert_eq!(remaining, "");
|
|
assert_eq!(result.source, "1. foo");
|
|
}
|
|
|
|
#[test]
|
|
fn plain_list_empty() {
|
|
let input = "1.";
|
|
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
|
let document_context =
|
|
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
|
|
let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
|
|
let (remaining, result) = plain_list_matcher(input).unwrap();
|
|
assert_eq!(remaining, "");
|
|
assert_eq!(result.source, "1.");
|
|
}
|
|
|
|
#[test]
|
|
fn plain_list_simple() {
|
|
let input = "1. foo";
|
|
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
|
let document_context =
|
|
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
|
|
let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
|
|
let (remaining, result) = plain_list_matcher(input).unwrap();
|
|
assert_eq!(remaining, "");
|
|
assert_eq!(result.source, "1. foo");
|
|
}
|
|
|
|
#[test]
|
|
fn plain_list_cant_start_line_with_asterisk() {
|
|
// Plain lists with an asterisk bullet must be indented or else they would be a headline
|
|
let input = "* foo";
|
|
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
|
let document_context =
|
|
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
|
|
let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
|
|
let result = plain_list_matcher(input);
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn indented_can_start_line_with_asterisk() {
|
|
// Plain lists with an asterisk bullet must be indented or else they would be a headline
|
|
let input = " * foo";
|
|
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
|
let document_context =
|
|
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
|
|
let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
|
|
let result = plain_list_matcher(input);
|
|
assert!(result.is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn two_blank_lines_ends_list() {
|
|
let input = r#"1. foo
|
|
2. bar
|
|
baz
|
|
3. lorem
|
|
|
|
|
|
ipsum
|
|
"#;
|
|
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
|
let document_context =
|
|
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
|
|
let plain_list_matcher = parser_with_context!(element)(&document_context);
|
|
let (remaining, result) =
|
|
plain_list_matcher(input).expect("Should parse the plain list successfully.");
|
|
assert_eq!(remaining, " ipsum\n");
|
|
assert_eq!(
|
|
result.get_source(),
|
|
r#"1. foo
|
|
2. bar
|
|
baz
|
|
3. lorem
|
|
|
|
|
|
"#
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn two_blank_lines_ends_nested_list() {
|
|
let input = r#"1. foo
|
|
1. bar
|
|
|
|
|
|
baz"#;
|
|
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
|
let document_context =
|
|
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
|
|
let plain_list_matcher = parser_with_context!(element)(&document_context);
|
|
let (remaining, result) =
|
|
plain_list_matcher(input).expect("Should parse the plain list successfully.");
|
|
assert_eq!(remaining, "baz");
|
|
assert_eq!(
|
|
result.get_source(),
|
|
r#"1. foo
|
|
1. bar
|
|
|
|
|
|
"#
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn interior_trailing_whitespace() {
|
|
let input = r#"1. foo
|
|
|
|
bar
|
|
|
|
1. baz
|
|
|
|
lorem
|
|
|
|
ipsum
|
|
|
|
|
|
dolar"#;
|
|
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
|
let document_context =
|
|
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
|
|
let plain_list_matcher = parser_with_context!(element)(&document_context);
|
|
let (remaining, result) =
|
|
plain_list_matcher(input).expect("Should parse the plain list successfully.");
|
|
assert_eq!(remaining, "dolar");
|
|
assert_eq!(
|
|
result.get_source(),
|
|
r#"1. foo
|
|
|
|
bar
|
|
|
|
1. baz
|
|
|
|
lorem
|
|
|
|
ipsum
|
|
|
|
|
|
"#
|
|
);
|
|
}
|
|
}
|