569 lines
20 KiB
Rust
569 lines
20 KiB
Rust
use nom::branch::alt;
|
|
use nom::bytes::complete::tag;
|
|
use nom::character::complete::digit1;
|
|
use nom::character::complete::line_ending;
|
|
use nom::character::complete::one_of;
|
|
use nom::character::complete::space0;
|
|
use nom::character::complete::space1;
|
|
use nom::combinator::eof;
|
|
use nom::combinator::not;
|
|
use nom::combinator::opt;
|
|
use nom::combinator::peek;
|
|
use nom::combinator::recognize;
|
|
use nom::combinator::verify;
|
|
use nom::multi::many0;
|
|
use nom::multi::many1;
|
|
use nom::multi::many_till;
|
|
use nom::sequence::tuple;
|
|
|
|
use super::element_parser::element;
|
|
use super::object_parser::standard_set_object;
|
|
use super::org_source::OrgSource;
|
|
use super::util::non_whitespace_character;
|
|
use crate::context::parser_with_context;
|
|
use crate::context::ContextElement;
|
|
use crate::context::ContextMatcher;
|
|
use crate::context::ExitClass;
|
|
use crate::context::ExitMatcherNode;
|
|
use crate::context::RefContext;
|
|
use crate::error::CustomError;
|
|
use crate::error::MyError;
|
|
use crate::error::Res;
|
|
use crate::parser::util::blank_line;
|
|
use crate::parser::util::exit_matcher_parser;
|
|
use crate::parser::util::get_consumed;
|
|
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
|
|
use crate::parser::util::start_of_line;
|
|
use crate::types::Object;
|
|
use crate::types::PlainList;
|
|
use crate::types::PlainListItem;
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
pub fn detect_plain_list<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
|
|
if verify(
|
|
tuple((
|
|
start_of_line,
|
|
space0,
|
|
bullet,
|
|
alt((space1, line_ending, eof)),
|
|
)),
|
|
|(_start, indent, bull, _after_whitespace)| {
|
|
Into::<&str>::into(bull) != "*" || indent.len() > 0
|
|
},
|
|
)(input)
|
|
.is_ok()
|
|
{
|
|
return Ok((input, ()));
|
|
}
|
|
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
|
"No element detected.".into(),
|
|
))));
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
pub fn plain_list<'b, 'g, 'r, 's>(
|
|
context: RefContext<'b, 'g, 'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, PlainList<'s>> {
|
|
let contexts = [
|
|
ContextElement::Context("plain list"),
|
|
ContextElement::ConsumeTrailingWhitespace(true),
|
|
ContextElement::ExitMatcherNode(ExitMatcherNode {
|
|
class: ExitClass::Beta,
|
|
exit_matcher: &plain_list_end,
|
|
}),
|
|
];
|
|
|
|
let parser_context = context.with_additional_node(&contexts[0]);
|
|
let parser_context = parser_context.with_additional_node(&contexts[1]);
|
|
let parser_context = parser_context.with_additional_node(&contexts[2]);
|
|
// children stores tuple of (input string, parsed object) so we can re-parse the final item
|
|
let mut children = Vec::new();
|
|
let mut first_item_indentation: Option<usize> = None;
|
|
let mut remaining = input;
|
|
|
|
// The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here:
|
|
//
|
|
// 1. Parse all items while consuming trailing whitespace, then edit the final item to remove trailing whitespace.
|
|
// 2. Parse all items without consuming trailing whitespace, then edit all but the final one to add in the trailing whitespace.
|
|
// 3. Re-parse the final item with consume trailing whitespace disabled.
|
|
//
|
|
// While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed.
|
|
|
|
loop {
|
|
let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining);
|
|
match list_item {
|
|
Ok((remain, item))
|
|
if item.indentation == *first_item_indentation.get_or_insert(item.indentation) =>
|
|
{
|
|
children.push((remaining, item));
|
|
remaining = remain;
|
|
}
|
|
Ok(_) | Err(_) => {
|
|
break;
|
|
}
|
|
};
|
|
|
|
let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining);
|
|
if maybe_exit.is_ok() {
|
|
break;
|
|
}
|
|
}
|
|
|
|
let (final_child_start, _final_item_first_parse) = match children.pop() {
|
|
Some(final_child) => final_child,
|
|
None => {
|
|
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
|
"Plain lists require at least one element.".into(),
|
|
))));
|
|
}
|
|
};
|
|
let final_item_context = ContextElement::ConsumeTrailingWhitespace(false);
|
|
let final_item_context = parser_context.with_additional_node(&final_item_context);
|
|
let (remaining, reparsed_final_item) =
|
|
parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?;
|
|
children.push((final_child_start, reparsed_final_item));
|
|
|
|
let source = get_consumed(input, remaining);
|
|
Ok((
|
|
remaining,
|
|
PlainList {
|
|
source: source.into(),
|
|
children: children.into_iter().map(|(_start, item)| item).collect(),
|
|
},
|
|
))
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
pub fn plain_list_item<'b, 'g, 'r, 's>(
|
|
context: RefContext<'b, 'g, 'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, PlainListItem<'s>> {
|
|
start_of_line(input)?;
|
|
let (remaining, leading_whitespace) = space0(input)?;
|
|
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|
|
let indent_level = leading_whitespace.len();
|
|
let (remaining, bull) = verify(bullet, |bull: &OrgSource<'_>| {
|
|
Into::<&str>::into(bull) != "*" || indent_level > 0
|
|
})(remaining)?;
|
|
|
|
let (remaining, maybe_tag) = opt(tuple((
|
|
space1,
|
|
parser_with_context!(item_tag)(context),
|
|
tag(" ::"),
|
|
)))(remaining)?;
|
|
let maybe_contentless_item: Res<OrgSource<'_>, OrgSource<'_>> =
|
|
peek(recognize(tuple((many0(blank_line), eof))))(remaining);
|
|
match maybe_contentless_item {
|
|
Ok((_rem, _ws)) => {
|
|
let (remaining, _trailing_ws) = opt(blank_line)(remaining)?;
|
|
let source = get_consumed(input, remaining);
|
|
return Ok((
|
|
remaining,
|
|
PlainListItem {
|
|
source: source.into(),
|
|
indentation: indent_level,
|
|
bullet: bull.into(),
|
|
tag: maybe_tag
|
|
.map(|(_ws, item_tag, _divider)| item_tag)
|
|
.unwrap_or(Vec::new()),
|
|
children: Vec::new(),
|
|
},
|
|
));
|
|
}
|
|
Err(_) => {}
|
|
};
|
|
let (remaining, _ws) = item_tag_post_gap(context, remaining)?;
|
|
let exit_matcher = plain_list_item_end(indent_level);
|
|
let contexts = [
|
|
ContextElement::ConsumeTrailingWhitespace(true),
|
|
ContextElement::ExitMatcherNode(ExitMatcherNode {
|
|
class: ExitClass::Beta,
|
|
exit_matcher: &exit_matcher,
|
|
}),
|
|
];
|
|
let parser_context = context.with_additional_node(&contexts[0]);
|
|
let parser_context = parser_context.with_additional_node(&contexts[1]);
|
|
|
|
let (mut remaining, (mut children, _exit_contents)) = many_till(
|
|
include_input(parser_with_context!(element(true))(&parser_context)),
|
|
parser_with_context!(exit_matcher_parser)(&parser_context),
|
|
)(remaining)?;
|
|
|
|
if !children.is_empty() && !context.should_consume_trailing_whitespace() {
|
|
let final_item_context = ContextElement::ConsumeTrailingWhitespace(false);
|
|
let final_item_context = parser_context.with_additional_node(&final_item_context);
|
|
let (final_child_start, _original_final_child) = children
|
|
.pop()
|
|
.expect("if-statement already checked that children was non-empty.");
|
|
let (remain, reparsed_final_element) = include_input(parser_with_context!(element(true))(
|
|
&final_item_context,
|
|
))(final_child_start)?;
|
|
remaining = remain;
|
|
children.push(reparsed_final_element);
|
|
}
|
|
|
|
let (remaining, _trailing_ws) =
|
|
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
|
|
|
|
let source = get_consumed(input, remaining);
|
|
return Ok((
|
|
remaining,
|
|
PlainListItem {
|
|
source: source.into(),
|
|
indentation: indent_level,
|
|
bullet: bull.into(),
|
|
tag: maybe_tag
|
|
.map(|(_ws, item_tag, _divider)| item_tag)
|
|
.unwrap_or(Vec::new()),
|
|
children: children.into_iter().map(|(_start, item)| item).collect(),
|
|
},
|
|
));
|
|
}
|
|
|
|
fn include_input<'s, F, O>(
|
|
mut inner: F,
|
|
) -> impl FnMut(OrgSource<'s>) -> Res<OrgSource<'s>, (OrgSource<'s>, O)>
|
|
where
|
|
F: FnMut(OrgSource<'s>) -> Res<OrgSource<'s>, O>,
|
|
{
|
|
move |input: OrgSource<'_>| {
|
|
let (remaining, output) = inner(input)?;
|
|
Ok((remaining, (input, output)))
|
|
}
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn bullet<'s>(i: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
|
alt((
|
|
tag("*"),
|
|
tag("-"),
|
|
tag("+"),
|
|
recognize(tuple((counter, alt((tag("."), tag(")")))))),
|
|
))(i)
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn counter<'s>(i: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
|
alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i)
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn plain_list_end<'b, 'g, 'r, 's>(
|
|
_context: RefContext<'b, 'g, 'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
|
recognize(tuple((
|
|
start_of_line,
|
|
verify(many1(blank_line), |lines: &Vec<OrgSource<'_>>| {
|
|
lines.len() >= 2
|
|
}),
|
|
)))(input)
|
|
}
|
|
|
|
const fn plain_list_item_end(indent_level: usize) -> impl ContextMatcher {
|
|
let line_indented_lte_matcher = line_indented_lte(indent_level);
|
|
move |context, input: OrgSource<'_>| {
|
|
_plain_list_item_end(context, input, &line_indented_lte_matcher)
|
|
}
|
|
}
|
|
|
|
#[cfg_attr(
|
|
feature = "tracing",
|
|
tracing::instrument(ret, level = "debug", skip(line_indented_lte_matcher))
|
|
)]
|
|
fn _plain_list_item_end<'b, 'g, 'r, 's>(
|
|
context: RefContext<'b, 'g, 'r, 's>,
|
|
input: OrgSource<'s>,
|
|
line_indented_lte_matcher: impl ContextMatcher,
|
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
|
start_of_line(input)?;
|
|
recognize(tuple((
|
|
opt(blank_line),
|
|
parser_with_context!(line_indented_lte_matcher)(context),
|
|
)))(input)
|
|
}
|
|
|
|
const fn line_indented_lte(indent_level: usize) -> impl ContextMatcher {
|
|
move |context, input: OrgSource<'_>| _line_indented_lte(context, input, indent_level)
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn _line_indented_lte<'b, 'g, 'r, 's>(
|
|
_context: RefContext<'b, 'g, 'r, 's>,
|
|
input: OrgSource<'s>,
|
|
indent_level: usize,
|
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
|
let matched = recognize(verify(
|
|
tuple((space0::<OrgSource<'_>, _>, non_whitespace_character)),
|
|
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|
|
|(_space0, _anychar)| _space0.len() <= indent_level,
|
|
))(input)?;
|
|
|
|
Ok(matched)
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn item_tag<'b, 'g, 'r, 's>(
|
|
context: RefContext<'b, 'g, 'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, Vec<Object<'s>>> {
|
|
let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode {
|
|
class: ExitClass::Gamma,
|
|
exit_matcher: &item_tag_end,
|
|
});
|
|
let parser_context = context.with_additional_node(&parser_context);
|
|
let (remaining, (children, _exit_contents)) = verify(
|
|
many_till(
|
|
// TODO: Should this be using a different set like the minimal set?
|
|
parser_with_context!(standard_set_object)(&parser_context),
|
|
parser_with_context!(exit_matcher_parser)(&parser_context),
|
|
),
|
|
|(children, _exit_contents)| !children.is_empty(),
|
|
)(input)?;
|
|
Ok((remaining, children))
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn item_tag_end<'b, 'g, 'r, 's>(
|
|
_context: RefContext<'b, 'g, 'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
|
recognize(alt((
|
|
line_ending,
|
|
tag(" :: "),
|
|
recognize(tuple((tag(" ::"), alt((line_ending, eof))))),
|
|
)))(input)
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn item_tag_post_gap<'b, 'g, 'r, 's>(
|
|
context: RefContext<'b, 'g, 'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
|
verify(
|
|
recognize(tuple((
|
|
alt((blank_line, space0)),
|
|
many_till(
|
|
blank_line,
|
|
alt((
|
|
peek(recognize(not(blank_line))),
|
|
peek(recognize(tuple((many0(blank_line), eof)))),
|
|
parser_with_context!(exit_matcher_parser)(context),
|
|
)),
|
|
),
|
|
))),
|
|
|gap| gap.len() > 0,
|
|
)(input)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::context::Context;
|
|
use crate::context::GlobalSettings;
|
|
use crate::context::List;
|
|
use crate::types::Source;
|
|
|
|
#[test]
|
|
fn plain_list_item_empty() {
|
|
let input = OrgSource::new("1.");
|
|
let global_settings = GlobalSettings::default();
|
|
let initial_context = ContextElement::document_context();
|
|
let initial_context = Context::new(&global_settings, List::new(&initial_context));
|
|
let plain_list_item_matcher = parser_with_context!(plain_list_item)(&initial_context);
|
|
let (remaining, result) = plain_list_item_matcher(input).unwrap();
|
|
assert_eq!(Into::<&str>::into(remaining), "");
|
|
assert_eq!(result.source, "1.");
|
|
}
|
|
|
|
#[test]
|
|
fn plain_list_item_simple() {
|
|
let input = OrgSource::new("1. foo");
|
|
let global_settings = GlobalSettings::default();
|
|
let initial_context = ContextElement::document_context();
|
|
let initial_context = Context::new(&global_settings, List::new(&initial_context));
|
|
let plain_list_item_matcher = parser_with_context!(plain_list_item)(&initial_context);
|
|
let (remaining, result) = plain_list_item_matcher(input).unwrap();
|
|
assert_eq!(Into::<&str>::into(remaining), "");
|
|
assert_eq!(result.source, "1. foo");
|
|
}
|
|
|
|
#[test]
|
|
fn plain_list_empty() {
|
|
let input = OrgSource::new("1.");
|
|
let global_settings = GlobalSettings::default();
|
|
let initial_context = ContextElement::document_context();
|
|
let initial_context = Context::new(&global_settings, List::new(&initial_context));
|
|
let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
|
|
let (remaining, result) = plain_list_matcher(input).unwrap();
|
|
assert_eq!(Into::<&str>::into(remaining), "");
|
|
assert_eq!(result.source, "1.");
|
|
}
|
|
|
|
#[test]
|
|
fn plain_list_simple() {
|
|
let input = OrgSource::new("1. foo");
|
|
let global_settings = GlobalSettings::default();
|
|
let initial_context = ContextElement::document_context();
|
|
let initial_context = Context::new(&global_settings, List::new(&initial_context));
|
|
let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
|
|
let (remaining, result) = plain_list_matcher(input).unwrap();
|
|
assert_eq!(Into::<&str>::into(remaining), "");
|
|
assert_eq!(result.source, "1. foo");
|
|
}
|
|
|
|
#[test]
|
|
fn plain_list_cant_start_line_with_asterisk() {
|
|
// Plain lists with an asterisk bullet must be indented or else they would be a headline
|
|
let input = OrgSource::new("* foo");
|
|
let global_settings = GlobalSettings::default();
|
|
let initial_context = ContextElement::document_context();
|
|
let initial_context = Context::new(&global_settings, List::new(&initial_context));
|
|
let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
|
|
let result = plain_list_matcher(input);
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn indented_can_start_line_with_asterisk() {
|
|
// Plain lists with an asterisk bullet must be indented or else they would be a headline
|
|
let input = OrgSource::new(" * foo");
|
|
let global_settings = GlobalSettings::default();
|
|
let initial_context = ContextElement::document_context();
|
|
let initial_context = Context::new(&global_settings, List::new(&initial_context));
|
|
let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
|
|
let result = plain_list_matcher(input);
|
|
assert!(result.is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn two_blank_lines_ends_list() {
|
|
let input = OrgSource::new(
|
|
r#"1. foo
|
|
2. bar
|
|
baz
|
|
3. lorem
|
|
|
|
|
|
ipsum
|
|
"#,
|
|
);
|
|
let global_settings = GlobalSettings::default();
|
|
let initial_context = ContextElement::document_context();
|
|
let initial_context = Context::new(&global_settings, List::new(&initial_context));
|
|
let plain_list_matcher = parser_with_context!(element(true))(&initial_context);
|
|
let (remaining, result) =
|
|
plain_list_matcher(input).expect("Should parse the plain list successfully.");
|
|
assert_eq!(Into::<&str>::into(remaining), " ipsum\n");
|
|
assert_eq!(
|
|
result.get_source(),
|
|
r#"1. foo
|
|
2. bar
|
|
baz
|
|
3. lorem
|
|
|
|
|
|
"#
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn two_blank_lines_ends_nested_list() {
|
|
let input = OrgSource::new(
|
|
r#"1. foo
|
|
1. bar
|
|
|
|
|
|
baz"#,
|
|
);
|
|
let global_settings = GlobalSettings::default();
|
|
let initial_context = ContextElement::document_context();
|
|
let initial_context = Context::new(&global_settings, List::new(&initial_context));
|
|
let plain_list_matcher = parser_with_context!(element(true))(&initial_context);
|
|
let (remaining, result) =
|
|
plain_list_matcher(input).expect("Should parse the plain list successfully.");
|
|
assert_eq!(Into::<&str>::into(remaining), "baz");
|
|
assert_eq!(
|
|
result.get_source(),
|
|
r#"1. foo
|
|
1. bar
|
|
|
|
|
|
"#
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn interior_trailing_whitespace() {
|
|
let input = OrgSource::new(
|
|
r#"1. foo
|
|
|
|
bar
|
|
|
|
1. baz
|
|
|
|
lorem
|
|
|
|
ipsum
|
|
|
|
|
|
dolar"#,
|
|
);
|
|
let global_settings = GlobalSettings::default();
|
|
let initial_context = ContextElement::document_context();
|
|
let initial_context = Context::new(&global_settings, List::new(&initial_context));
|
|
let plain_list_matcher = parser_with_context!(element(true))(&initial_context);
|
|
let (remaining, result) =
|
|
plain_list_matcher(input).expect("Should parse the plain list successfully.");
|
|
assert_eq!(Into::<&str>::into(remaining), "dolar");
|
|
assert_eq!(
|
|
result.get_source(),
|
|
r#"1. foo
|
|
|
|
bar
|
|
|
|
1. baz
|
|
|
|
lorem
|
|
|
|
ipsum
|
|
|
|
|
|
"#
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn detect_line_break() {
|
|
let input = OrgSource::new(
|
|
r#"+
|
|
"#,
|
|
);
|
|
let result = detect_plain_list(input);
|
|
assert!(result.is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn detect_eof() {
|
|
let input = OrgSource::new(r#"+"#);
|
|
let result = detect_plain_list(input);
|
|
assert!(result.is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn detect_no_gap() {
|
|
let input = OrgSource::new(r#"+foo"#);
|
|
let result = detect_plain_list(input);
|
|
// Since there is no whitespace after the '+' this is a paragraph, not a plain list.
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
#[test]
|
|
fn detect_with_gap() {
|
|
let input = OrgSource::new(r#"+ foo"#);
|
|
let result = detect_plain_list(input);
|
|
assert!(result.is_ok());
|
|
}
|
|
}
|