organic/src/parser/plain_list.rs

use super::greater_element::PlainList;
use super::greater_element::PlainListItem;
use super::parser_with_context::parser_with_context;

use super::util::non_whitespace_character;
use super::Context;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::element_parser::element;
use crate::parser::exiting::ExitClass;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::util::blank_line;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::start_of_line;
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::digit1;
use nom::character::complete::line_ending;
use nom::character::complete::one_of;
use nom::character::complete::space0;
use nom::character::complete::space1;
use nom::combinator::eof;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
use tracing::span;

#[tracing::instrument(ret, level = "debug")]
pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> {
    let parser_context = context
        .with_additional_node(ContextElement::Context("plain list"))
        .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
            class: ExitClass::Beta,
            exit_matcher: &plain_list_end,
        }));
    let without_consume_context =
        parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
    let with_consume_context =
        parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true));
    let without_consume_matcher = parser_with_context!(plain_list_item)(&without_consume_context);
    let with_consume_matcher = parser_with_context!(plain_list_item)(&with_consume_context);
    let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
    let mut children = Vec::new();
    let mut first_item_indentation: Option<usize> = None;
    let mut remaining = input;

    loop {
        /*
        Trailing whitespace belongs to the plain list, not the plain list item

        Possible outcomes:
        Don't consume, yes exit matcher
        Don't consume, no additional item
        Consume, additional item
         */
        {
            // Don't consume, yes exit matcher
            let span = span!(tracing::Level::DEBUG, "first");
            let _enter = span.enter();

            let last_item_then_exit = tuple((without_consume_matcher, exit_matcher))(remaining);
            match last_item_then_exit {
                Ok((remain, (item, _exit)))
                    if item.indentation
                        == *first_item_indentation.get_or_insert(item.indentation) =>
                {
                    remaining = remain;
                    children.push(item);
                    break;
                }
                Ok(_) | Err(_) => {}
            };
        }

        {
            // Consume, additional item
            let span = span!(tracing::Level::DEBUG, "second");
            let _enter = span.enter();

            let not_last_item =
                tuple((with_consume_matcher, peek(without_consume_matcher)))(remaining);
            match not_last_item {
                Ok((remain, (item, future_item)))
                    if item.indentation
                        == *first_item_indentation.get_or_insert(item.indentation)
                        && future_item.indentation
                            == *first_item_indentation.get_or_insert(item.indentation) =>
                {
                    remaining = remain;
                    children.push(item);
                    continue;
                }
                Ok(_) | Err(_) => {}
            };
        }

        {
            // Don't consume, no additional item
            let span = span!(tracing::Level::DEBUG, "third");
            let _enter = span.enter();

            let last_item_then_exit = without_consume_matcher(remaining);
            match last_item_then_exit {
                Ok((remain, item))
                    if item.indentation
                        == *first_item_indentation.get_or_insert(item.indentation) =>
                {
                    remaining = remain;
                    children.push(item);
                    break;
                }
                Ok(_) | Err(_) => {
                    // TODO: Maybe this is reachable when there are no items at all.
                    return Err(nom::Err::Error(CustomError::MyError(MyError(
                        "Should be unreachable.",
                    ))));
                    // unreachable!();
                }
            };
        }
    }

    if children.is_empty() {
        return Err(nom::Err::Error(CustomError::MyError(MyError(
            "Plain lists require at least one element.",
        ))));
    }

    let source = get_consumed(input, remaining);
    Ok((remaining, PlainList { source, children }))
}

#[tracing::instrument(ret, level = "debug")]
pub fn plain_list_item<'r, 's>(
    context: Context<'r, 's>,
    input: &'s str,
) -> Res<&'s str, PlainListItem<'s>> {
    start_of_line(context, input)?;
    let (remaining, leading_whitespace) = space0(input)?;
    // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
    let indent_level = leading_whitespace.len();
    let with_consume_context = context
        .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
        .with_additional_node(ContextElement::ListItem(indent_level))
        .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
            class: ExitClass::Beta,
            exit_matcher: &plain_list_item_end,
        }));
    let without_consume_context = context
        .with_additional_node(ContextElement::ListItem(indent_level))
        .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
            class: ExitClass::Beta,
            exit_matcher: &plain_list_item_end,
        }));

    let with_consume_matcher = parser_with_context!(element)(&with_consume_context);
    let without_consume_matcher = parser_with_context!(element)(&without_consume_context);
    let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context);
    let (remaining, bull) =
        verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?;
    let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining);
    match maybe_contentless_item {
        Ok((rem, _ws)) => {
            // TODO: do we need to consume if this isn't the last item?
            let source = get_consumed(input, rem);
            return Ok((
                rem,
                PlainListItem {
                    source,
                    indentation: indent_level,
                    bullet: bull,
                    children: Vec::new(),
                },
            ));
        }
        Err(_) => {
            let (remaining, _ws) = space1(remaining)?;
            let (remaining, (mut contents, final_element)) = many_till(
                with_consume_matcher,
                alt((
                    terminated(without_consume_matcher, exit_matcher),
                    preceded(
                        peek(tuple((with_consume_matcher, exit_matcher))),
                        without_consume_matcher,
                    ),
                )),
            )(remaining)?;
            contents.push(final_element);
            let source = get_consumed(input, remaining);
            return Ok((
                remaining,
                PlainListItem {
                    source,
                    indentation: indent_level,
                    bullet: bull,
                    children: contents,
                },
            ));
        }
    };
}

#[tracing::instrument(ret, level = "debug")]
fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> {
    alt((
        tag("*"),
        tag("-"),
        tag("+"),
        recognize(tuple((counter, alt((tag("."), tag(")")))))),
    ))(i)
}

#[tracing::instrument(ret, level = "debug")]
fn counter<'s>(i: &'s str) -> Res<&'s str, &'s str> {
    alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i)
}

#[tracing::instrument(ret, level = "debug")]
fn plain_list_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
    let start_of_line_matcher = parser_with_context!(start_of_line)(context);
    recognize(tuple((
        start_of_line_matcher,
        verify(many1(blank_line), |lines: &Vec<&str>| lines.len() >= 2),
    )))(input)
}

#[tracing::instrument(ret, level = "debug")]
fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
    let current_item_indent_level: &usize =
        get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError(
            "Not inside a plain list item",
        ))))?;
    let plain_list_item_matcher = parser_with_context!(plain_list_item)(context);
    let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context);
    alt((
        recognize(verify(plain_list_item_matcher, |pli| {
            pli.indentation <= *current_item_indent_level
        })),
        recognize(line_indented_lte_matcher),
    ))(input)
}

#[tracing::instrument(ret, level = "debug")]
fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
    let current_item_indent_level: &usize =
        get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError(
            "Not inside a plain list item",
        ))))?;

    start_of_line(context, input)?;

    let matched = recognize(verify(
        tuple((space0::<&str, _>, non_whitespace_character)),
        // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
        |(_space0, _anychar)| _space0.len() <= *current_item_indent_level,
    ))(input)?;

    Ok(matched)
}

fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> {
    for thing in context.iter() {
        match thing.get_data() {
            ContextElement::ListItem(depth) => return Some(depth),
            _ => {}
        };
    }
    None
}

#[cfg(test)]
mod tests {
    use crate::parser::parser_context::ContextElement;
    use crate::parser::parser_context::ContextTree;
    use crate::parser::parser_with_context::parser_with_context;
    use crate::parser::Source;

    use super::*;

    #[test]
    fn plain_list_item_empty() {
        let input = "1.";
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let document_context =
            initial_context.with_additional_node(ContextElement::DocumentRoot(input));
        let plain_list_item_matcher = parser_with_context!(plain_list_item)(&document_context);
        let (remaining, result) = plain_list_item_matcher(input).unwrap();
        assert_eq!(remaining, "");
        assert_eq!(result.source, "1.");
    }

    #[test]
    fn plain_list_item_simple() {
        let input = "1. foo";
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let document_context =
            initial_context.with_additional_node(ContextElement::DocumentRoot(input));
        let plain_list_item_matcher = parser_with_context!(plain_list_item)(&document_context);
        let (remaining, result) = plain_list_item_matcher(input).unwrap();
        assert_eq!(remaining, "");
        assert_eq!(result.source, "1. foo");
    }

    #[test]
    fn plain_list_empty() {
        let input = "1.";
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let document_context =
            initial_context.with_additional_node(ContextElement::DocumentRoot(input));
        let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
        let (remaining, result) = plain_list_matcher(input).unwrap();
        assert_eq!(remaining, "");
        assert_eq!(result.source, "1.");
    }

    #[test]
    fn plain_list_simple() {
        let input = "1. foo";
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let document_context =
            initial_context.with_additional_node(ContextElement::DocumentRoot(input));
        let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
        let (remaining, result) = plain_list_matcher(input).unwrap();
        assert_eq!(remaining, "");
        assert_eq!(result.source, "1. foo");
    }

    #[test]
    fn plain_list_cant_start_line_with_asterisk() {
        // Plain lists with an asterisk bullet must be indented or else they would be a headline
        let input = "* foo";
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let document_context =
            initial_context.with_additional_node(ContextElement::DocumentRoot(input));
        let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
        let result = plain_list_matcher(input);
        assert!(result.is_err());
    }

    #[test]
    fn indented_can_start_line_with_asterisk() {
        // Plain lists with an asterisk bullet must be indented or else they would be a headline
        let input = " * foo";
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let document_context =
            initial_context.with_additional_node(ContextElement::DocumentRoot(input));
        let plain_list_matcher = parser_with_context!(plain_list)(&document_context);
        let result = plain_list_matcher(input);
        assert!(result.is_ok());
    }

    #[test]
    fn two_blank_lines_ends_list() {
        let input = r#"1. foo
2. bar
   baz
3. lorem


   ipsum
"#;
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let document_context =
            initial_context.with_additional_node(ContextElement::DocumentRoot(input));
        let plain_list_matcher = parser_with_context!(element)(&document_context);
        let (remaining, result) =
            plain_list_matcher(input).expect("Should parse the plain list successfully.");
        assert_eq!(remaining, "   ipsum\n");
        assert_eq!(
            result.get_source(),
            r#"1. foo
2. bar
   baz
3. lorem


"#
        );
    }

    #[test]
    fn two_blank_lines_ends_nested_list() {
        let input = r#"1. foo
   1. bar


baz"#;
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let document_context =
            initial_context.with_additional_node(ContextElement::DocumentRoot(input));
        let plain_list_matcher = parser_with_context!(element)(&document_context);
        let (remaining, result) =
            plain_list_matcher(input).expect("Should parse the plain list successfully.");
        assert_eq!(remaining, "baz");
        assert_eq!(
            result.get_source(),
            r#"1. foo
   1. bar


"#
        );
    }

    #[test]
    fn interior_trailing_whitespace() {
        let input = r#"1. foo

   bar

   1. baz

      lorem

   ipsum


dolar"#;
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let document_context =
            initial_context.with_additional_node(ContextElement::DocumentRoot(input));
        let plain_list_matcher = parser_with_context!(element)(&document_context);
        let (remaining, result) =
            plain_list_matcher(input).expect("Should parse the plain list successfully.");
        assert_eq!(remaining, "dolar");
        assert_eq!(
            result.get_source(),
            r#"1. foo

   bar

   1. baz

      lorem

   ipsum


"#
        );
    }
}