organic/src/parser/plain_list.rs

474 lines
16 KiB
Rust
Raw Normal View History

2023-03-25 14:10:22 -04:00
use nom::branch::alt;
2023-03-25 14:23:52 -04:00
use nom::bytes::complete::tag;
use nom::character::complete::anychar;
2023-03-25 14:23:52 -04:00
use nom::character::complete::digit1;
use nom::character::complete::line_ending;
use nom::character::complete::multispace1;
2023-03-25 14:23:52 -04:00
use nom::character::complete::one_of;
2023-03-25 14:10:22 -04:00
use nom::character::complete::space0;
use nom::character::complete::space1;
2023-03-25 14:10:22 -04:00
use nom::combinator::eof;
use nom::combinator::opt;
use nom::combinator::peek;
2023-03-25 14:10:22 -04:00
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many1;
use nom::multi::many_till;
2023-03-25 14:10:22 -04:00
use nom::sequence::tuple;
use super::greater_element::PlainList;
use super::greater_element::PlainListItem;
use super::org_source::OrgSource;
use super::parser_with_context::parser_with_context;
use super::util::non_whitespace_character;
use super::Context;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::element_parser::element;
use crate::parser::exiting::ExitClass;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::util::blank_line;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
use crate::parser::util::start_of_line;
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn detect_plain_list<'r, 's>(
_context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, ()> {
if verify(
tuple((
start_of_line,
space0,
bullet,
alt((space1, line_ending, eof)),
)),
|(_start, indent, bull, _after_whitespace)| {
Into::<&str>::into(bull) != "*" || indent.len() > 0
},
)(input)
.is_ok()
{
return Ok((input, ()));
}
return Err(nom::Err::Error(CustomError::MyError(MyError(
"No element detected.".into(),
))));
}
2023-08-10 20:04:59 -04:00
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn plain_list<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, PlainList<'s>> {
let parser_context = context
.with_additional_node(ContextElement::Context("plain list"))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &plain_list_end,
}));
// children stores tuple of (input string, parsed object) so we can re-parse the final item
let mut children = Vec::new();
let mut first_item_indentation: Option<usize> = None;
let mut remaining = input;
// The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here:
//
// 1. Parse all items while consuming trailing whitespace, then edit the final item to remove trailing whitespace.
// 2. Parse all items without consuming trailing whitespace, then edit all but the final one to add in the trailing whitespace.
// 3. Re-parse the final item with consume trailing whitespace disabled.
//
// While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed.
loop {
let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining);
match list_item {
Ok((remain, item))
if item.indentation == *first_item_indentation.get_or_insert(item.indentation) =>
{
children.push((remaining, item));
remaining = remain;
}
Ok(_) | Err(_) => {
break;
}
};
let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining);
if maybe_exit.is_ok() {
break;
}
}
let (final_child_start, _final_item_first_parse) = match children.pop() {
Some(final_child) => final_child,
None => {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Plain lists require at least one element.".into(),
))));
}
};
let final_item_context =
parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
let (remaining, reparsed_final_item) =
parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?;
children.push((final_child_start, reparsed_final_item));
let source = get_consumed(input, remaining);
Ok((
remaining,
PlainList {
source: source.into(),
children: children.into_iter().map(|(_start, item)| item).collect(),
},
))
2023-03-25 14:28:48 -04:00
}
2023-08-10 20:04:59 -04:00
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn plain_list_item<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, PlainListItem<'s>> {
start_of_line(input)?;
2023-03-25 14:10:22 -04:00
let (remaining, leading_whitespace) = space0(input)?;
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
let indent_level = leading_whitespace.len();
let (remaining, bull) = verify(bullet, |bull: &OrgSource<'_>| {
Into::<&str>::into(bull) != "*" || indent_level > 0
})(remaining)?;
let maybe_contentless_item: Res<OrgSource<'_>, OrgSource<'_>> = eof(remaining);
match maybe_contentless_item {
Ok((rem, _ws)) => {
let source = get_consumed(input, rem);
return Ok((
rem,
PlainListItem {
source: source.into(),
indentation: indent_level,
bullet: bull.into(),
children: Vec::new(),
},
));
}
Err(_) => {}
};
let (remaining, _maybe_tag) = opt(tuple((space1, item_tag, tag(" ::"))))(remaining)?;
let (remaining, _ws) = alt((space1, line_ending))(remaining)?;
let exit_matcher = plain_list_item_end(indent_level);
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &exit_matcher,
}));
let (remaining, (children, _exit_contents)) = many_till(
parser_with_context!(element(true))(&parser_context),
parser_with_context!(exit_matcher_parser)(&parser_context),
)(remaining)?;
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
return Ok((
remaining,
PlainListItem {
source: source.into(),
indentation: indent_level,
bullet: bull.into(),
children,
},
));
2023-03-25 14:23:52 -04:00
}
2023-08-10 20:04:59 -04:00
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn bullet<'s>(i: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
2023-03-25 14:23:52 -04:00
alt((
tag("*"),
tag("-"),
tag("+"),
recognize(tuple((counter, alt((tag("."), tag(")")))))),
))(i)
}
2023-08-10 20:04:59 -04:00
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn counter<'s>(i: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
2023-03-25 14:23:52 -04:00
alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i)
}
2023-03-25 14:10:22 -04:00
2023-08-10 20:04:59 -04:00
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn plain_list_end<'r, 's>(
2023-08-24 23:43:41 +00:00
_context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(tuple((
start_of_line,
verify(many1(blank_line), |lines: &Vec<OrgSource<'_>>| {
lines.len() >= 2
}),
)))(input)
}
const fn plain_list_item_end(
indent_level: usize,
) -> impl for<'r, 's> Fn(Context<'r, 's>, OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
let line_indented_lte_matcher = line_indented_lte(indent_level);
move |context: Context, input: OrgSource<'_>| {
_plain_list_item_end(context, input, &line_indented_lte_matcher)
}
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(line_indented_lte_matcher))
)]
fn _plain_list_item_end<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
line_indented_lte_matcher: impl for<'rr, 'ss> Fn(
Context<'rr, 'ss>,
OrgSource<'ss>,
) -> Res<OrgSource<'ss>, OrgSource<'ss>>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
start_of_line(input)?;
recognize(tuple((
opt(blank_line),
parser_with_context!(line_indented_lte_matcher)(context),
)))(input)
2023-03-25 14:10:22 -04:00
}
const fn line_indented_lte(
indent_level: usize,
) -> impl for<'r, 's> Fn(Context<'r, 's>, OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
move |context: Context, input: OrgSource<'_>| _line_indented_lte(context, input, indent_level)
}
2023-08-10 20:04:59 -04:00
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _line_indented_lte<'r, 's>(
_context: Context<'r, 's>,
input: OrgSource<'s>,
indent_level: usize,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
2023-03-25 14:10:22 -04:00
let matched = recognize(verify(
tuple((space0::<OrgSource<'_>, _>, non_whitespace_character)),
2023-03-25 14:10:22 -04:00
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|(_space0, _anychar)| _space0.len() <= indent_level,
2023-03-25 14:10:22 -04:00
))(input)?;
Ok(matched)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn item_tag<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(many_till(
anychar,
peek(alt((
line_ending,
tag(" :: "),
recognize(tuple((tag(" ::"), alt((line_ending, eof))))),
))),
))(input)
}
2023-03-25 14:28:48 -04:00
#[cfg(test)]
mod tests {
use super::*;
2023-03-25 14:28:48 -04:00
use crate::parser::parser_context::ContextTree;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::Source;
2023-03-25 14:28:48 -04:00
#[test]
fn plain_list_item_empty() {
2023-08-24 17:15:24 -04:00
let input = OrgSource::new("1.");
2023-03-25 14:28:48 -04:00
let initial_context: ContextTree<'_, '_> = ContextTree::new();
2023-08-24 17:15:24 -04:00
let plain_list_item_matcher = parser_with_context!(plain_list_item)(&initial_context);
2023-03-25 14:28:48 -04:00
let (remaining, result) = plain_list_item_matcher(input).unwrap();
2023-08-24 17:15:24 -04:00
assert_eq!(Into::<&str>::into(remaining), "");
2023-03-25 14:28:48 -04:00
assert_eq!(result.source, "1.");
}
#[test]
fn plain_list_item_simple() {
2023-08-24 17:15:24 -04:00
let input = OrgSource::new("1. foo");
let initial_context: ContextTree<'_, '_> = ContextTree::new();
2023-08-24 17:15:24 -04:00
let plain_list_item_matcher = parser_with_context!(plain_list_item)(&initial_context);
let (remaining, result) = plain_list_item_matcher(input).unwrap();
2023-08-24 17:15:24 -04:00
assert_eq!(Into::<&str>::into(remaining), "");
assert_eq!(result.source, "1. foo");
}
#[test]
fn plain_list_empty() {
2023-08-24 17:15:24 -04:00
let input = OrgSource::new("1.");
let initial_context: ContextTree<'_, '_> = ContextTree::new();
2023-08-24 17:15:24 -04:00
let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
let (remaining, result) = plain_list_matcher(input).unwrap();
2023-08-24 17:15:24 -04:00
assert_eq!(Into::<&str>::into(remaining), "");
assert_eq!(result.source, "1.");
}
#[test]
fn plain_list_simple() {
2023-08-24 17:15:24 -04:00
let input = OrgSource::new("1. foo");
let initial_context: ContextTree<'_, '_> = ContextTree::new();
2023-08-24 17:15:24 -04:00
let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
let (remaining, result) = plain_list_matcher(input).unwrap();
2023-08-24 17:15:24 -04:00
assert_eq!(Into::<&str>::into(remaining), "");
assert_eq!(result.source, "1. foo");
}
#[test]
fn plain_list_cant_start_line_with_asterisk() {
// Plain lists with an asterisk bullet must be indented or else they would be a headline
2023-08-24 17:15:24 -04:00
let input = OrgSource::new("* foo");
let initial_context: ContextTree<'_, '_> = ContextTree::new();
2023-08-24 17:15:24 -04:00
let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
let result = plain_list_matcher(input);
assert!(result.is_err());
}
#[test]
fn indented_can_start_line_with_asterisk() {
// Plain lists with an asterisk bullet must be indented or else they would be a headline
2023-08-24 17:15:24 -04:00
let input = OrgSource::new(" * foo");
let initial_context: ContextTree<'_, '_> = ContextTree::new();
2023-08-24 17:15:24 -04:00
let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
let result = plain_list_matcher(input);
assert!(result.is_ok());
}
#[test]
fn two_blank_lines_ends_list() {
2023-08-24 17:15:24 -04:00
let input = OrgSource::new(
r#"1. foo
2. bar
baz
3. lorem
ipsum
2023-08-24 17:15:24 -04:00
"#,
);
let initial_context: ContextTree<'_, '_> = ContextTree::new();
2023-08-24 17:15:24 -04:00
let plain_list_matcher = parser_with_context!(element(true))(&initial_context);
let (remaining, result) =
plain_list_matcher(input).expect("Should parse the plain list successfully.");
2023-08-24 17:15:24 -04:00
assert_eq!(Into::<&str>::into(remaining), " ipsum\n");
assert_eq!(
result.get_source(),
r#"1. foo
2. bar
baz
3. lorem
"#
);
}
#[test]
fn two_blank_lines_ends_nested_list() {
2023-08-24 17:15:24 -04:00
let input = OrgSource::new(
r#"1. foo
1. bar
2023-08-24 17:15:24 -04:00
baz"#,
);
let initial_context: ContextTree<'_, '_> = ContextTree::new();
2023-08-24 17:15:24 -04:00
let plain_list_matcher = parser_with_context!(element(true))(&initial_context);
let (remaining, result) =
plain_list_matcher(input).expect("Should parse the plain list successfully.");
2023-08-24 17:15:24 -04:00
assert_eq!(Into::<&str>::into(remaining), "baz");
assert_eq!(
result.get_source(),
r#"1. foo
1. bar
"#
);
}
#[test]
fn interior_trailing_whitespace() {
2023-08-24 17:15:24 -04:00
let input = OrgSource::new(
r#"1. foo
bar
1. baz
lorem
ipsum
2023-08-24 17:15:24 -04:00
dolar"#,
);
let initial_context: ContextTree<'_, '_> = ContextTree::new();
2023-08-24 17:15:24 -04:00
let plain_list_matcher = parser_with_context!(element(true))(&initial_context);
let (remaining, result) =
plain_list_matcher(input).expect("Should parse the plain list successfully.");
2023-08-24 17:15:24 -04:00
assert_eq!(Into::<&str>::into(remaining), "dolar");
assert_eq!(
result.get_source(),
r#"1. foo
bar
1. baz
lorem
ipsum
"#
);
}
2023-08-25 05:25:41 -04:00
#[test]
fn detect_line_break() {
let input = OrgSource::new(
r#"+
"#,
);
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let result = detect_plain_list(&initial_context, input);
assert!(result.is_ok());
}
#[test]
fn detect_eof() {
let input = OrgSource::new(r#"+"#);
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let result = detect_plain_list(&initial_context, input);
assert!(result.is_ok());
}
#[test]
fn detect_no_gap() {
let input = OrgSource::new(r#"+foo"#);
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let result = detect_plain_list(&initial_context, input);
// Since there is no whitespace after the '+' this is a paragraph, not a plain list.
assert!(result.is_err());
}
#[test]
fn detect_with_gap() {
let input = OrgSource::new(r#"+ foo"#);
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let result = detect_plain_list(&initial_context, input);
assert!(result.is_ok());
}
2023-03-25 14:28:48 -04:00
}