organic/src/parser/plain_list.rs

use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::anychar;
use nom::character::complete::digit1;
use nom::character::complete::line_ending;
use nom::character::complete::multispace1;
use nom::character::complete::one_of;
use nom::character::complete::space0;
use nom::character::complete::space1;
use nom::combinator::eof;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::tuple;

use super::greater_element::PlainList;
use super::greater_element::PlainListItem;
use super::org_source::OrgSource;
use super::parser_with_context::parser_with_context;
use super::util::non_whitespace_character;
use super::Context;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::element_parser::element;
use crate::parser::exiting::ExitClass;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::util::blank_line;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
use crate::parser::util::start_of_line;

#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn detect_plain_list<'r, 's>(
    _context: Context<'r, 's>,
    input: OrgSource<'s>,
) -> Res<OrgSource<'s>, ()> {
    if verify(
        tuple((
            start_of_line,
            space0,
            bullet,
            alt((space1, line_ending, eof)),
        )),
        |(_start, indent, bull, _after_whitespace)| {
            Into::<&str>::into(bull) != "*" || indent.len() > 0
        },
    )(input)
    .is_ok()
    {
        return Ok((input, ()));
    }
    return Err(nom::Err::Error(CustomError::MyError(MyError(
        "No element detected.".into(),
    ))));
}

#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn plain_list<'r, 's>(
    context: Context<'r, 's>,
    input: OrgSource<'s>,
) -> Res<OrgSource<'s>, PlainList<'s>> {
    let parser_context = context
        .with_additional_node(ContextElement::Context("plain list"))
        .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
            class: ExitClass::Beta,
            exit_matcher: &plain_list_end,
        }));
    // children stores tuple of (input string, parsed object) so we can re-parse the final item
    let mut children = Vec::new();
    let mut first_item_indentation: Option<usize> = None;
    let mut remaining = input;

    // The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here:
    //
    // 1. Parse all items while consuming trailing whitespace, then edit the final item to remove trailing whitespace.
    // 2. Parse all items without consuming trailing whitespace, then edit all but the final one to add in the trailing whitespace.
    // 3. Re-parse the final item with consume trailing whitespace disabled.
    //
    // While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed.

    loop {
        let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining);
        match list_item {
            Ok((remain, item))
                if item.indentation == *first_item_indentation.get_or_insert(item.indentation) =>
            {
                children.push((remaining, item));
                remaining = remain;
            }
            Ok(_) | Err(_) => {
                break;
            }
        };

        let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining);
        if maybe_exit.is_ok() {
            break;
        }
    }

    let (final_child_start, _final_item_first_parse) = match children.pop() {
        Some(final_child) => final_child,
        None => {
            return Err(nom::Err::Error(CustomError::MyError(MyError(
                "Plain lists require at least one element.".into(),
            ))));
        }
    };
    let final_item_context =
        parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false));
    let (remaining, reparsed_final_item) =
        parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?;
    children.push((final_child_start, reparsed_final_item));

    let source = get_consumed(input, remaining);
    Ok((
        remaining,
        PlainList {
            source: source.into(),
            children: children.into_iter().map(|(_start, item)| item).collect(),
        },
    ))
}

#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn plain_list_item<'r, 's>(
    context: Context<'r, 's>,
    input: OrgSource<'s>,
) -> Res<OrgSource<'s>, PlainListItem<'s>> {
    start_of_line(input)?;
    let (remaining, leading_whitespace) = space0(input)?;
    // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
    let indent_level = leading_whitespace.len();
    let (remaining, bull) = verify(bullet, |bull: &OrgSource<'_>| {
        Into::<&str>::into(bull) != "*" || indent_level > 0
    })(remaining)?;

    let maybe_contentless_item: Res<OrgSource<'_>, OrgSource<'_>> = eof(remaining);
    match maybe_contentless_item {
        Ok((rem, _ws)) => {
            let source = get_consumed(input, rem);
            return Ok((
                rem,
                PlainListItem {
                    source: source.into(),
                    indentation: indent_level,
                    bullet: bull.into(),
                    children: Vec::new(),
                },
            ));
        }
        Err(_) => {}
    };

    let (remaining, _maybe_tag) = opt(tuple((space1, item_tag, tag(" ::"))))(remaining)?;
    let (remaining, _ws) = alt((space1, line_ending))(remaining)?;
    let exit_matcher = plain_list_item_end(indent_level);
    let parser_context = context
        .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
        .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
            class: ExitClass::Beta,
            exit_matcher: &exit_matcher,
        }));

    let (remaining, (children, _exit_contents)) = many_till(
        parser_with_context!(element(true))(&parser_context),
        parser_with_context!(exit_matcher_parser)(&parser_context),
    )(remaining)?;

    let (remaining, _trailing_ws) =
        maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;

    let source = get_consumed(input, remaining);
    return Ok((
        remaining,
        PlainListItem {
            source: source.into(),
            indentation: indent_level,
            bullet: bull.into(),
            children,
        },
    ));
}

#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn bullet<'s>(i: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
    alt((
        tag("*"),
        tag("-"),
        tag("+"),
        recognize(tuple((counter, alt((tag("."), tag(")")))))),
    ))(i)
}

#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn counter<'s>(i: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
    alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i)
}

#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn plain_list_end<'r, 's>(
    _context: Context<'r, 's>,
    input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
    recognize(tuple((
        start_of_line,
        verify(many1(blank_line), |lines: &Vec<OrgSource<'_>>| {
            lines.len() >= 2
        }),
    )))(input)
}

const fn plain_list_item_end(
    indent_level: usize,
) -> impl for<'r, 's> Fn(Context<'r, 's>, OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
    let line_indented_lte_matcher = line_indented_lte(indent_level);
    move |context: Context, input: OrgSource<'_>| {
        _plain_list_item_end(context, input, &line_indented_lte_matcher)
    }
}

#[cfg_attr(
    feature = "tracing",
    tracing::instrument(ret, level = "debug", skip(line_indented_lte_matcher))
)]
fn _plain_list_item_end<'r, 's>(
    context: Context<'r, 's>,
    input: OrgSource<'s>,
    line_indented_lte_matcher: impl for<'rr, 'ss> Fn(
        Context<'rr, 'ss>,
        OrgSource<'ss>,
    ) -> Res<OrgSource<'ss>, OrgSource<'ss>>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
    start_of_line(input)?;
    recognize(tuple((
        opt(blank_line),
        parser_with_context!(line_indented_lte_matcher)(context),
    )))(input)
}

const fn line_indented_lte(
    indent_level: usize,
) -> impl for<'r, 's> Fn(Context<'r, 's>, OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
    move |context: Context, input: OrgSource<'_>| _line_indented_lte(context, input, indent_level)
}

#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _line_indented_lte<'r, 's>(
    _context: Context<'r, 's>,
    input: OrgSource<'s>,
    indent_level: usize,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
    let matched = recognize(verify(
        tuple((space0::<OrgSource<'_>, _>, non_whitespace_character)),
        // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
        |(_space0, _anychar)| _space0.len() <= indent_level,
    ))(input)?;

    Ok(matched)
}

#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn item_tag<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
    recognize(many_till(
        anychar,
        peek(alt((
            line_ending,
            tag(" :: "),
            recognize(tuple((tag(" ::"), alt((line_ending, eof))))),
        ))),
    ))(input)
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::parser::parser_context::ContextTree;
    use crate::parser::parser_with_context::parser_with_context;
    use crate::parser::Source;

    #[test]
    fn plain_list_item_empty() {
        let input = OrgSource::new("1.");
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let plain_list_item_matcher = parser_with_context!(plain_list_item)(&initial_context);
        let (remaining, result) = plain_list_item_matcher(input).unwrap();
        assert_eq!(Into::<&str>::into(remaining), "");
        assert_eq!(result.source, "1.");
    }

    #[test]
    fn plain_list_item_simple() {
        let input = OrgSource::new("1. foo");
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let plain_list_item_matcher = parser_with_context!(plain_list_item)(&initial_context);
        let (remaining, result) = plain_list_item_matcher(input).unwrap();
        assert_eq!(Into::<&str>::into(remaining), "");
        assert_eq!(result.source, "1. foo");
    }

    #[test]
    fn plain_list_empty() {
        let input = OrgSource::new("1.");
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
        let (remaining, result) = plain_list_matcher(input).unwrap();
        assert_eq!(Into::<&str>::into(remaining), "");
        assert_eq!(result.source, "1.");
    }

    #[test]
    fn plain_list_simple() {
        let input = OrgSource::new("1. foo");
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
        let (remaining, result) = plain_list_matcher(input).unwrap();
        assert_eq!(Into::<&str>::into(remaining), "");
        assert_eq!(result.source, "1. foo");
    }

    #[test]
    fn plain_list_cant_start_line_with_asterisk() {
        // Plain lists with an asterisk bullet must be indented or else they would be a headline
        let input = OrgSource::new("* foo");
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
        let result = plain_list_matcher(input);
        assert!(result.is_err());
    }

    #[test]
    fn indented_can_start_line_with_asterisk() {
        // Plain lists with an asterisk bullet must be indented or else they would be a headline
        let input = OrgSource::new(" * foo");
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let plain_list_matcher = parser_with_context!(plain_list)(&initial_context);
        let result = plain_list_matcher(input);
        assert!(result.is_ok());
    }

    #[test]
    fn two_blank_lines_ends_list() {
        let input = OrgSource::new(
            r#"1. foo
2. bar
   baz
3. lorem


   ipsum
"#,
        );
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let plain_list_matcher = parser_with_context!(element(true))(&initial_context);
        let (remaining, result) =
            plain_list_matcher(input).expect("Should parse the plain list successfully.");
        assert_eq!(Into::<&str>::into(remaining), "   ipsum\n");
        assert_eq!(
            result.get_source(),
            r#"1. foo
2. bar
   baz
3. lorem


"#
        );
    }

    #[test]
    fn two_blank_lines_ends_nested_list() {
        let input = OrgSource::new(
            r#"1. foo
   1. bar


baz"#,
        );
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let plain_list_matcher = parser_with_context!(element(true))(&initial_context);
        let (remaining, result) =
            plain_list_matcher(input).expect("Should parse the plain list successfully.");
        assert_eq!(Into::<&str>::into(remaining), "baz");
        assert_eq!(
            result.get_source(),
            r#"1. foo
   1. bar


"#
        );
    }

    #[test]
    fn interior_trailing_whitespace() {
        let input = OrgSource::new(
            r#"1. foo

   bar

   1. baz

      lorem

   ipsum


dolar"#,
        );
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let plain_list_matcher = parser_with_context!(element(true))(&initial_context);
        let (remaining, result) =
            plain_list_matcher(input).expect("Should parse the plain list successfully.");
        assert_eq!(Into::<&str>::into(remaining), "dolar");
        assert_eq!(
            result.get_source(),
            r#"1. foo

   bar

   1. baz

      lorem

   ipsum


"#
        );
    }

    #[test]
    fn detect_line_break() {
        let input = OrgSource::new(
            r#"+
"#,
        );
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let result = detect_plain_list(&initial_context, input);
        assert!(result.is_ok());
    }

    #[test]
    fn detect_eof() {
        let input = OrgSource::new(r#"+"#);
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let result = detect_plain_list(&initial_context, input);
        assert!(result.is_ok());
    }

    #[test]
    fn detect_no_gap() {
        let input = OrgSource::new(r#"+foo"#);
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let result = detect_plain_list(&initial_context, input);
        // Since there is no whitespace after the '+' this is a paragraph, not a plain list.
        assert!(result.is_err());
    }

    #[test]
    fn detect_with_gap() {
        let input = OrgSource::new(r#"+ foo"#);
        let initial_context: ContextTree<'_, '_> = ContextTree::new();
        let result = detect_plain_list(&initial_context, input);
        assert!(result.is_ok());
    }
}