organic/src/parser/plain_list.rs

253 lines
9.7 KiB
Rust
Raw Normal View History

use super::error::CustomError;
use super::error::MyError;
use super::error::Res;
use super::greater_element::PlainList;
use super::greater_element::PlainListItem;
use super::parser_with_context::parser_with_context;
use super::util::maybe_consume_trailing_whitespace_if_not_exiting;
use super::util::non_whitespace_character;
use super::Context;
use crate::parser::element::element;
use crate::parser::parser_context::ChainBehavior;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::start_of_line;
2023-03-25 18:10:22 +00:00
use nom::branch::alt;
2023-03-25 18:23:52 +00:00
use nom::bytes::complete::tag;
use nom::character::complete::digit1;
use nom::character::complete::line_ending;
2023-03-25 18:23:52 +00:00
use nom::character::complete::one_of;
2023-03-25 18:10:22 +00:00
use nom::character::complete::space0;
use nom::character::complete::space1;
2023-03-25 18:10:22 +00:00
use nom::combinator::eof;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many_till;
2023-03-25 18:10:22 +00:00
use nom::sequence::tuple;
2023-03-25 18:28:48 +00:00
pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> {
// TODO: Are we handling 2 blank lines causing the end of all plain lists?
let (mut remaining, first_item) = plain_list_item(context, input)?;
let first_item_indentation = first_item.indentation;
let plain_list_item_matcher = parser_with_context!(plain_list_item)(context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(context);
let mut children = Vec::new();
children.push(first_item);
loop {
let exit_contents = exit_matcher(remaining);
if exit_contents.is_ok() {
break;
}
let next_list_item = plain_list_item_matcher(remaining);
match next_list_item {
Ok((remain, next_child)) if next_child.indentation == first_item_indentation => {
children.push(next_child);
remaining = remain;
}
Ok(_) | Err(_) => break,
};
}
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, PlainList { source, children }))
2023-03-25 18:28:48 +00:00
}
#[tracing::instrument(ret, level = "debug")]
pub fn plain_list_item<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
2023-03-25 18:23:52 +00:00
) -> Res<&'s str, PlainListItem<'s>> {
2023-03-25 18:10:22 +00:00
start_of_line(context, input)?;
let (remaining, leading_whitespace) = space0(input)?;
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
let indent_level = leading_whitespace.len();
2023-03-25 18:23:52 +00:00
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false))
.with_additional_node(ContextElement::ListItem(indent_level))
2023-03-25 18:10:22 +00:00
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)),
}));
2023-03-25 18:23:52 +00:00
let element_matcher = parser_with_context!(element)(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let (remaining, bull) =
verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?;
let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining);
match maybe_contentless_item {
Ok((rem, _ws)) => {
let source = get_consumed(input, rem);
return Ok((
rem,
PlainListItem {
source,
indentation: indent_level,
bullet: bull,
children: Vec::new(),
},
));
}
Err(_) => {
let (remaining, _ws) = space1(remaining)?;
let (remaining, (contents, _exit_contents)) =
many_till(element_matcher, exit_matcher)(remaining)?;
let source = get_consumed(input, remaining);
return Ok((
remaining,
PlainListItem {
source,
indentation: indent_level,
bullet: bull,
children: contents,
},
));
}
};
2023-03-25 18:23:52 +00:00
}
#[tracing::instrument(ret, level = "debug")]
2023-03-25 18:23:52 +00:00
fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> {
alt((
tag("*"),
tag("-"),
tag("+"),
recognize(tuple((counter, alt((tag("."), tag(")")))))),
))(i)
}
#[tracing::instrument(ret, level = "debug")]
2023-03-25 18:23:52 +00:00
fn counter<'s>(i: &'s str) -> Res<&'s str, &'s str> {
alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i)
}
2023-03-25 18:10:22 +00:00
#[tracing::instrument(ret, level = "debug")]
2023-03-25 18:10:22 +00:00
fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
2023-04-03 19:06:12 +00:00
let current_item_indent_level: &usize =
get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError(
"Not inside a plain list item",
))))?;
2023-03-25 18:10:22 +00:00
let plain_list_item_matcher = parser_with_context!(plain_list_item)(context);
let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context);
alt((
2023-04-03 19:06:12 +00:00
recognize(verify(plain_list_item_matcher, |pli| {
pli.indentation <= *current_item_indent_level
})),
recognize(line_indented_lte_matcher),
2023-03-25 18:10:22 +00:00
eof,
))(input)
}
#[tracing::instrument(ret, level = "debug")]
2023-03-25 18:10:22 +00:00
fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let current_item_indent_level: &usize =
get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError(
"Not inside a plain list item",
))))?;
start_of_line(context, input)?;
let matched = recognize(verify(
tuple((space0::<&str, _>, non_whitespace_character)),
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
|(_space0, _anychar)| _space0.len() <= *current_item_indent_level,
))(input)?;
Ok(matched)
}
fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> {
for thing in context.iter() {
match thing.get_data() {
ContextElement::ListItem(depth) => return Some(depth),
_ => {}
};
}
None
}
2023-03-25 18:28:48 +00:00
#[cfg(test)]
mod tests {
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ContextTree;
use crate::parser::parser_with_context::parser_with_context;
use super::*;
#[test]
fn plain_list_item_empty() {
let input = "1.";
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let plain_list_item_matcher = parser_with_context!(plain_list_item)(&document_context);
let (remaining, result) = plain_list_item_matcher(input).unwrap();
assert_eq!(remaining, "");
assert_eq!(result.source, "1.");
}
#[test]
fn plain_list_item_simple() {
let input = "1. foo";
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let plain_list_item_matcher = parser_with_context!(plain_list_item)(&document_context);
let (remaining, result) = plain_list_item_matcher(input).unwrap();
assert_eq!(remaining, "");
assert_eq!(result.source, "1. foo");
}
#[test]
fn plain_list_empty() {
let input = "1.";
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
let (remaining, result) = plain_list_matcher(input).unwrap();
assert_eq!(remaining, "");
assert_eq!(result.source, "1.");
}
#[test]
fn plain_list_simple() {
let input = "1. foo";
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
let (remaining, result) = plain_list_matcher(input).unwrap();
assert_eq!(remaining, "");
assert_eq!(result.source, "1. foo");
}
#[test]
fn plain_list_cant_start_line_with_asterisk() {
// Plain lists with an asterisk bullet must be indented or else they would be a headline
let input = "* foo";
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
let result = plain_list_matcher(input);
assert!(result.is_err());
}
#[test]
fn indented_can_start_line_with_asterisk() {
// Plain lists with an asterisk bullet must be indented or else they would be a headline
let input = " * foo";
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
let result = plain_list_matcher(input);
assert!(result.is_ok());
}
2023-03-25 18:28:48 +00:00
}