organic/src/parser/plain_list.rs
2023-09-03 16:22:40 -04:00

569 lines
20 KiB
Rust

use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::digit1;
use nom::character::complete::line_ending;
use nom::character::complete::one_of;
use nom::character::complete::space0;
use nom::character::complete::space1;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::element_parser::element;
use super::object_parser::standard_set_object;
use super::org_source::OrgSource;
use super::util::non_whitespace_character;
use crate::context::parser_with_context;
use crate::context::ContextElement;
use crate::context::ContextMatcher;
use crate::context::ExitClass;
use crate::context::ExitMatcherNode;
use crate::context::RefContext;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::util::blank_line;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
use crate::parser::util::start_of_line;
use crate::types::Object;
use crate::types::PlainList;
use crate::types::PlainListItem;
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn detect_plain_list<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
if verify(
tuple((
start_of_line,
space0,
bullet,
alt((space1, line_ending, eof)),
)),
|(_start, indent, bull, _after_whitespace)| {
Into::<&str>::into(bull) != "*" || indent.len() > 0
},
)(input)
.is_ok()
{
return Ok((input, ()));
}
return Err(nom::Err::Error(CustomError::MyError(MyError(
"No element detected.".into(),
))));
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn plain_list<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, PlainList<'s>> {
let contexts = [
ContextElement::Context("plain list"),
ContextElement::ConsumeTrailingWhitespace(true),
ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &plain_list_end,
}),
];
let parser_context = context.with_additional_node(&contexts[0]);
let parser_context = parser_context.with_additional_node(&contexts[1]);
let parser_context = parser_context.with_additional_node(&contexts[2]);
// children stores tuple of (input string, parsed object) so we can re-parse the final item
let mut children = Vec::new();
let mut first_item_indentation: Option<usize> = None;
let mut remaining = input;
// The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here:
//
// 1. Parse all items while consuming trailing whitespace, then edit the final item to remove trailing whitespace.
// 2. Parse all items without consuming trailing whitespace, then edit all but the final one to add in the trailing whitespace.
// 3. Re-parse the final item with consume trailing whitespace disabled.
//
// While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed.
loop {
let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining);
match list_item {
Ok((remain, item))
if item.indentation == *first_item_indentation.get_or_insert(item.indentation) =>
{
children.push((remaining, item));
remaining = remain;
}
Ok(_) | Err(_) => {
break;
}
};
let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining);
if maybe_exit.is_ok() {
break;
}
}
let (final_child_start, _final_item_first_parse) = match children.pop() {
Some(final_child) => final_child,
None => {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Plain lists require at least one element.".into(),
))));
}
};
let final_item_context = ContextElement::ConsumeTrailingWhitespace(false);
let final_item_context = parser_context.with_additional_node(&final_item_context);
let (remaining, reparsed_final_item) =
parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?;
children.push((final_child_start, reparsed_final_item));
let source = get_consumed(input, remaining);
Ok((
remaining,
PlainList {
source: source.into(),
children: children.into_iter().map(|(_start, item)| item).collect(),
},
))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn plain_list_item<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, PlainListItem<'s>> {
start_of_line(input)?;
let (remaining, leading_whitespace) = space0(input)?;
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
let indent_level = leading_whitespace.len();
let (remaining, bull) = verify(bullet, |bull: &OrgSource<'_>| {
Into::<&str>::into(bull) != "*" || indent_level > 0
})(remaining)?;
let (remaining, maybe_tag) = opt(tuple((
space1,
parser_with_context!(item_tag)(context),
tag(" ::"),
)))(remaining)?;
let maybe_contentless_item: Res<OrgSource<'_>, OrgSource<'_>> =
peek(recognize(tuple((many0(blank_line), eof))))(remaining);
match maybe_contentless_item {
Ok((_rem, _ws)) => {
let (remaining, _trailing_ws) = opt(blank_line)(remaining)?;
let source = get_consumed(input, remaining);
return Ok((
remaining,
PlainListItem {
source: source.into(),
indentation: indent_level,
bullet: bull.into(),
tag: maybe_tag
.map(|(_ws, item_tag, _divider)| item_tag)
.unwrap_or(Vec::new()),
children: Vec::new(),
},
));
}
Err(_) => {}
};
let (remaining, _ws) = item_tag_post_gap(context, remaining)?;
let exit_matcher = plain_list_item_end(indent_level);
let contexts = [
ContextElement::ConsumeTrailingWhitespace(true),
ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &exit_matcher,
}),
];
let parser_context = context.with_additional_node(&contexts[0]);
let parser_context = parser_context.with_additional_node(&contexts[1]);
let (mut remaining, (mut children, _exit_contents)) = many_till(
include_input(parser_with_context!(element(true))(&parser_context)),
parser_with_context!(exit_matcher_parser)(&parser_context),
)(remaining)?;
if !children.is_empty() && !context.should_consume_trailing_whitespace() {
let final_item_context = ContextElement::ConsumeTrailingWhitespace(false);
let final_item_context = parser_context.with_additional_node(&final_item_context);
let (final_child_start, _original_final_child) = children
.pop()
.expect("if-statement already checked that children was non-empty.");
let (remain, reparsed_final_element) = include_input(parser_with_context!(element(true))(
&final_item_context,
))(final_child_start)?;
remaining = remain;
children.push(reparsed_final_element);
}
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
return Ok((
remaining,
PlainListItem {
source: source.into(),
indentation: indent_level,
bullet: bull.into(),
tag: maybe_tag
.map(|(_ws, item_tag, _divider)| item_tag)
.unwrap_or(Vec::new()),
children: children.into_iter().map(|(_start, item)| item).collect(),
},
));
}
fn include_input<'s, F, O>(
mut inner: F,
) -> impl FnMut(OrgSource<'s>) -> Res<OrgSource<'s>, (OrgSource<'s>, O)>
where
F: FnMut(OrgSource<'s>) -> Res<OrgSource<'s>, O>,
{
move |input: OrgSource<'_>| {
let (remaining, output) = inner(input)?;
Ok((remaining, (input, output)))
}
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn bullet<'s>(i: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
alt((
tag("*"),
tag("-"),
tag("+"),
recognize(tuple((counter, alt((tag("."), tag(")")))))),
))(i)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn counter<'s>(i: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn plain_list_end<'b, 'g, 'r, 's>(
_context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(tuple((
start_of_line,
verify(many1(blank_line), |lines: &Vec<OrgSource<'_>>| {
lines.len() >= 2
}),
)))(input)
}
const fn plain_list_item_end(indent_level: usize) -> impl ContextMatcher {
let line_indented_lte_matcher = line_indented_lte(indent_level);
move |context, input: OrgSource<'_>| {
_plain_list_item_end(context, input, &line_indented_lte_matcher)
}
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(line_indented_lte_matcher))
)]
fn _plain_list_item_end<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
line_indented_lte_matcher: impl ContextMatcher,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
start_of_line(input)?;
recognize(tuple((
opt(blank_line),
parser_with_context!(line_indented_lte_matcher)(context),
)))(input)
}
const fn line_indented_lte(indent_level: usize) -> impl ContextMatcher {
move |context, input: OrgSource<'_>| _line_indented_lte(context, input, indent_level)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _line_indented_lte<'b, 'g, 'r, 's>(
_context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
indent_level: usize,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let matched = recognize(verify(
tuple((space0::<OrgSource<'_>, _>, non_whitespace_character)),
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|(_space0, _anychar)| _space0.len() <= indent_level,
))(input)?;
Ok(matched)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn item_tag<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Vec<Object<'s>>> {
let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Gamma,
exit_matcher: &item_tag_end,
});
let parser_context = context.with_additional_node(&parser_context);
let (remaining, (children, _exit_contents)) = verify(
many_till(
// TODO: Should this be using a different set like the minimal set?
parser_with_context!(standard_set_object)(&parser_context),
parser_with_context!(exit_matcher_parser)(&parser_context),
),
|(children, _exit_contents)| !children.is_empty(),
)(input)?;
Ok((remaining, children))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn item_tag_end<'b, 'g, 'r, 's>(
_context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(alt((
line_ending,
tag(" :: "),
recognize(tuple((tag(" ::"), alt((line_ending, eof))))),
)))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn item_tag_post_gap<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
verify(
recognize(tuple((
alt((blank_line, space0)),
many_till(
blank_line,
alt((
peek(recognize(not(blank_line))),
peek(recognize(tuple((many0(blank_line), eof)))),
parser_with_context!(exit_matcher_parser)(context),
)),
),
))),
|gap| gap.len() > 0,
)(input)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::context::Context;
use crate::context::GlobalSettings;
use crate::context::List;
use crate::types::Source;
#[test]
fn plain_list_item_empty() {
let input = OrgSource::new("1.");
let global_settings = GlobalSettings::default();
let initial_context = ContextElement::document_context();
let initial_context = Context::new(&global_settings, List::new(&initial_context));
let plain_list_item_matcher = parser_with_context!(plain_list_item)(&initial_context);
let (remaining, result) = plain_list_item_matcher(input).unwrap();
assert_eq!(Into::<&str>::into(remaining), "");
assert_eq!(result.source, "1.");
}
#[test]
fn plain_list_item_simple() {
let input = OrgSource::new("1. foo");
let global_settings = GlobalSettings::default();
let initial_context = ContextElement::document_context();
let initial_context = Context::new(&global_settings, List::new(&initial_context));
let plain_list_item_matcher = parser_with_context!(plain_list_item)(&initial_context);
let (remaining, result) = plain_list_item_matcher(input).unwrap();
assert_eq!(Into::<&str>::into(remaining), "");
assert_eq!(result.source, "1. foo");
}
#[test]
fn plain_list_empty() {
let input = OrgSource::new("1.");
let global_settings = GlobalSettings::default();
let initial_context = ContextElement::document_context();
let initial_context = Context::new(&global_settings, List::new(&initial_context));
let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
let (remaining, result) = plain_list_matcher(input).unwrap();
assert_eq!(Into::<&str>::into(remaining), "");
assert_eq!(result.source, "1.");
}
#[test]
fn plain_list_simple() {
let input = OrgSource::new("1. foo");
let global_settings = GlobalSettings::default();
let initial_context = ContextElement::document_context();
let initial_context = Context::new(&global_settings, List::new(&initial_context));
let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
let (remaining, result) = plain_list_matcher(input).unwrap();
assert_eq!(Into::<&str>::into(remaining), "");
assert_eq!(result.source, "1. foo");
}
#[test]
fn plain_list_cant_start_line_with_asterisk() {
// Plain lists with an asterisk bullet must be indented or else they would be a headline
let input = OrgSource::new("* foo");
let global_settings = GlobalSettings::default();
let initial_context = ContextElement::document_context();
let initial_context = Context::new(&global_settings, List::new(&initial_context));
let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
let result = plain_list_matcher(input);
assert!(result.is_err());
}
#[test]
fn indented_can_start_line_with_asterisk() {
// Plain lists with an asterisk bullet must be indented or else they would be a headline
let input = OrgSource::new(" * foo");
let global_settings = GlobalSettings::default();
let initial_context = ContextElement::document_context();
let initial_context = Context::new(&global_settings, List::new(&initial_context));
let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
let result = plain_list_matcher(input);
assert!(result.is_ok());
}
#[test]
fn two_blank_lines_ends_list() {
let input = OrgSource::new(
r#"1. foo
2. bar
baz
3. lorem
ipsum
"#,
);
let global_settings = GlobalSettings::default();
let initial_context = ContextElement::document_context();
let initial_context = Context::new(&global_settings, List::new(&initial_context));
let plain_list_matcher = parser_with_context!(element(true))(&initial_context);
let (remaining, result) =
plain_list_matcher(input).expect("Should parse the plain list successfully.");
assert_eq!(Into::<&str>::into(remaining), " ipsum\n");
assert_eq!(
result.get_source(),
r#"1. foo
2. bar
baz
3. lorem
"#
);
}
#[test]
fn two_blank_lines_ends_nested_list() {
let input = OrgSource::new(
r#"1. foo
1. bar
baz"#,
);
let global_settings = GlobalSettings::default();
let initial_context = ContextElement::document_context();
let initial_context = Context::new(&global_settings, List::new(&initial_context));
let plain_list_matcher = parser_with_context!(element(true))(&initial_context);
let (remaining, result) =
plain_list_matcher(input).expect("Should parse the plain list successfully.");
assert_eq!(Into::<&str>::into(remaining), "baz");
assert_eq!(
result.get_source(),
r#"1. foo
1. bar
"#
);
}
#[test]
fn interior_trailing_whitespace() {
let input = OrgSource::new(
r#"1. foo
bar
1. baz
lorem
ipsum
dolar"#,
);
let global_settings = GlobalSettings::default();
let initial_context = ContextElement::document_context();
let initial_context = Context::new(&global_settings, List::new(&initial_context));
let plain_list_matcher = parser_with_context!(element(true))(&initial_context);
let (remaining, result) =
plain_list_matcher(input).expect("Should parse the plain list successfully.");
assert_eq!(Into::<&str>::into(remaining), "dolar");
assert_eq!(
result.get_source(),
r#"1. foo
bar
1. baz
lorem
ipsum
"#
);
}
#[test]
fn detect_line_break() {
let input = OrgSource::new(
r#"+
"#,
);
let result = detect_plain_list(input);
assert!(result.is_ok());
}
#[test]
fn detect_eof() {
let input = OrgSource::new(r#"+"#);
let result = detect_plain_list(input);
assert!(result.is_ok());
}
#[test]
fn detect_no_gap() {
let input = OrgSource::new(r#"+foo"#);
let result = detect_plain_list(input);
// Since there is no whitespace after the '+' this is a paragraph, not a plain list.
assert!(result.is_err());
}
#[test]
fn detect_with_gap() {
let input = OrgSource::new(r#"+ foo"#);
let result = detect_plain_list(input);
assert!(result.is_ok());
}
}