use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::anychar; use nom::character::complete::digit1; use nom::character::complete::line_ending; use nom::character::complete::one_of; use nom::character::complete::space0; use nom::character::complete::space1; use nom::combinator::eof; use nom::combinator::map; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many0; use nom::multi::many1; use nom::multi::many_till; use nom::sequence::tuple; use super::element_parser::element; use super::object_parser::standard_set_object; use super::org_source::OrgSource; use super::util::include_input; use super::util::indentation_level; use super::util::non_whitespace_character; use crate::context::parser_with_context; use crate::context::ContextElement; use crate::context::ContextMatcher; use crate::context::ExitClass; use crate::context::ExitMatcherNode; use crate::context::RefContext; use crate::error::CustomError; use crate::error::MyError; use crate::error::Res; use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; use crate::parser::util::org_space; use crate::parser::util::start_of_line; use crate::types::CheckboxType; use crate::types::IndentationLevel; use crate::types::Object; use crate::types::PlainList; use crate::types::PlainListItem; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub(crate) fn detect_plain_list<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { if verify( tuple(( start_of_line, space0, parser_with_context!(bullet)(context), alt((space1, line_ending, eof)), )), |(_start, indent, (_bullet_type, bull), _after_whitespace)| { Into::<&str>::into(bull) != "*" || indent.len() > 0 }, )(input) .is_ok() { return Ok((input, ())); } return Err(nom::Err::Error(CustomError::MyError(MyError( "No element detected.".into(), )))); } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub(crate) fn plain_list<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, PlainList<'s>> { let contexts = [ ContextElement::Context("plain list"), ContextElement::ConsumeTrailingWhitespace(true), ContextElement::ExitMatcherNode(ExitMatcherNode { class: ExitClass::Beta, exit_matcher: &plain_list_end, }), ]; let parser_context = context.with_additional_node(&contexts[0]); let parser_context = parser_context.with_additional_node(&contexts[1]); let parser_context = parser_context.with_additional_node(&contexts[2]); // children stores tuple of (input string, parsed object) so we can re-parse the final item let mut children = Vec::new(); let mut first_item_indentation: Option = None; let mut remaining = input; // The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here: // // 1. Parse all items while consuming trailing whitespace, then edit the final item to remove trailing whitespace. // 2. Parse all items without consuming trailing whitespace, then edit all but the final one to add in the trailing whitespace. // 3. Re-parse the final item with consume trailing whitespace disabled. // // While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed. loop { let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining); match list_item { Ok((remain, item)) if item.indentation == *first_item_indentation.get_or_insert(item.indentation) => { children.push((remaining, item)); remaining = remain; } Ok(_) | Err(_) => { break; } }; let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining); if maybe_exit.is_ok() { break; } } let (final_child_start, _final_item_first_parse) = match children.pop() { Some(final_child) => final_child, None => { return Err(nom::Err::Error(CustomError::MyError(MyError( "Plain lists require at least one element.".into(), )))); } }; let final_item_context = ContextElement::ConsumeTrailingWhitespace(false); let final_item_context = parser_context.with_additional_node(&final_item_context); let (remaining, reparsed_final_item) = parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?; children.push((final_child_start, reparsed_final_item)); let source = get_consumed(input, remaining); Ok(( remaining, PlainList { source: source.into(), children: children.into_iter().map(|(_start, item)| item).collect(), }, )) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn plain_list_item<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, PlainListItem<'s>> { start_of_line(input)?; let (remaining, (indent_level, _leading_whitespace)) = indentation_level(context, input)?; let (remaining, (bullet_type, bull)) = verify( parser_with_context!(bullet)(context), |(_bullet_type, bull)| Into::<&str>::into(bull) != "*" || indent_level > 0, )(remaining)?; let (remaining, _maybe_counter_set) = opt(tuple(( space1, tag("[@"), parser_with_context!(counter)(context), tag("]"), )))(remaining)?; let (remaining, maybe_checkbox) = opt(tuple((space1, item_checkbox)))(remaining)?; let (remaining, maybe_tag) = if let BulletType::Unordered = bullet_type { opt(tuple((space1, parser_with_context!(item_tag)(context))))(remaining)? } else { (remaining, None) }; let exit_matcher = plain_list_item_end(indent_level); let contexts = [ ContextElement::ConsumeTrailingWhitespace(true), ContextElement::ExitMatcherNode(ExitMatcherNode { class: ExitClass::Beta, exit_matcher: &exit_matcher, }), ]; let parser_context = context.with_additional_node(&contexts[0]); let parser_context = parser_context.with_additional_node(&contexts[1]); let maybe_contentless_item: Res, ()> = peek(parser_with_context!( detect_contentless_item_contents )(&parser_context))(remaining); match maybe_contentless_item { Ok((_rem, _ws)) => { let (remaining, _trailing_ws) = if context.should_consume_trailing_whitespace() { recognize(alt((recognize(many1(blank_line)), eof)))(remaining)? } else { recognize(alt((blank_line, eof)))(remaining)? }; let source = get_consumed(input, remaining); return Ok(( remaining, PlainListItem { source: source.into(), indentation: indent_level, bullet: bull.into(), checkbox: None, tag: maybe_tag .map(|(_ws, item_tag)| item_tag) .unwrap_or(Vec::new()), children: Vec::new(), }, )); } Err(_) => {} }; let (remaining, _ws) = item_tag_post_gap(&parser_context, remaining)?; let (mut remaining, (mut children, _exit_contents)) = many_till( include_input(parser_with_context!(element(true))(&parser_context)), parser_with_context!(exit_matcher_parser)(&parser_context), )(remaining)?; if !children.is_empty() && !context.should_consume_trailing_whitespace() { let final_item_context = ContextElement::ConsumeTrailingWhitespace(false); let final_item_context = parser_context.with_additional_node(&final_item_context); let (final_child_start, _original_final_child) = children .pop() .expect("if-statement already checked that children was non-empty."); let (remain, reparsed_final_element) = include_input(parser_with_context!(element(true))( &final_item_context, ))(final_child_start)?; remaining = remain; children.push(reparsed_final_element); } let (remaining, _trailing_ws) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); return Ok(( remaining, PlainListItem { source: source.into(), indentation: indent_level, bullet: bull.into(), checkbox: maybe_checkbox .map(|(_, (checkbox_type, source))| (checkbox_type, Into::<&str>::into(source))), tag: maybe_tag .map(|(_ws, item_tag)| item_tag) .unwrap_or(Vec::new()), children: children.into_iter().map(|(_start, item)| item).collect(), }, )); } #[derive(Debug)] enum BulletType { Ordered, Unordered, } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn bullet<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, (BulletType, OrgSource<'s>)> { alt(( map(tag("*"), |bull| (BulletType::Unordered, bull)), map(tag("-"), |bull| (BulletType::Unordered, bull)), map(tag("+"), |bull| (BulletType::Unordered, bull)), map( recognize(tuple(( parser_with_context!(counter)(context), alt((tag("."), tag(")"))), ))), |bull| (BulletType::Ordered, bull), ), ))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn counter<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { if context.get_global_settings().org_list_allow_alphabetical { alt(( recognize(one_of( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", )), digit1, ))(input) } else { digit1(input) } } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn plain_list_end<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { recognize(tuple(( start_of_line, verify(many1(blank_line), |lines: &Vec>| { lines.len() >= 2 }), )))(input) } const fn plain_list_item_end(indent_level: IndentationLevel) -> impl ContextMatcher { let line_indented_lte_matcher = line_indented_lte(indent_level); move |context, input: OrgSource<'_>| { _plain_list_item_end(context, input, &line_indented_lte_matcher) } } #[cfg_attr( feature = "tracing", tracing::instrument(ret, level = "debug", skip(line_indented_lte_matcher)) )] fn _plain_list_item_end<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, line_indented_lte_matcher: impl ContextMatcher, ) -> Res, OrgSource<'s>> { start_of_line(input)?; recognize(tuple(( opt(blank_line), parser_with_context!(line_indented_lte_matcher)(context), )))(input) } const fn line_indented_lte(indent_level: IndentationLevel) -> impl ContextMatcher { move |context, input: OrgSource<'_>| _line_indented_lte(context, input, indent_level) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn _line_indented_lte<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, indent_level: IndentationLevel, ) -> Res, OrgSource<'s>> { let matched = recognize(verify( tuple(( parser_with_context!(indentation_level)(context), non_whitespace_character, )), // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) |((indentation_level, _leading_whitespace), _anychar)| *indentation_level <= indent_level, ))(input)?; Ok(matched) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn item_tag<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Vec>> { let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { class: ExitClass::Gamma, exit_matcher: &item_tag_end, }); let parser_context = context.with_additional_node(&parser_context); let (remaining, (children, _exit_contents)) = verify( many_till( // TODO: Should this be using a different set like the minimal set? parser_with_context!(standard_set_object)(&parser_context), parser_with_context!(exit_matcher_parser)(&parser_context), ), |(children, _exit_contents)| !children.is_empty(), )(input)?; let (remaining, _) = item_tag_divider(remaining)?; Ok((remaining, children)) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn item_tag_end<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { alt((item_tag_divider, line_ending))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn item_tag_divider<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { recognize(tuple(( one_of(" \t"), tag("::"), peek(tuple(( opt(tuple(( peek(one_of(" \t")), many_till(anychar, peek(alt((item_tag_divider, line_ending, eof)))), ))), alt((line_ending, eof)), ))), )))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn item_tag_post_gap<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { verify( recognize(tuple(( alt((blank_line, space0)), many_till( blank_line, alt(( peek(recognize(not(blank_line))), peek(recognize(tuple((many0(blank_line), eof)))), parser_with_context!(exit_matcher_parser)(context), )), ), ))), |gap| gap.len() > 0, )(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn item_checkbox<'s>(input: OrgSource<'s>) -> Res, (CheckboxType, OrgSource<'s>)> { alt(( map( recognize(tuple((tag("["), org_space, tag("]")))), |capture| (CheckboxType::Off, capture), ), map(tag("[-]"), |capture| (CheckboxType::Trans, capture)), map(tag("[X]"), |capture| (CheckboxType::On, capture)), ))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn detect_contentless_item_contents<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { let (remaining, _) = recognize(many_till( blank_line, parser_with_context!(exit_matcher_parser)(context), ))(input)?; Ok((remaining, ())) } #[cfg(test)] mod tests { use super::*; use crate::context::Context; use crate::context::GlobalSettings; use crate::context::List; use crate::types::Source; #[test] fn plain_list_item_empty() { let input = OrgSource::new("1."); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_item_matcher = parser_with_context!(plain_list_item)(&initial_context); let (remaining, result) = plain_list_item_matcher(input).unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!(result.source, "1."); } #[test] fn plain_list_item_simple() { let input = OrgSource::new("1. foo"); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_item_matcher = parser_with_context!(plain_list_item)(&initial_context); let (remaining, result) = plain_list_item_matcher(input).unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!(result.source, "1. foo"); } #[test] fn plain_list_empty() { let input = OrgSource::new("1."); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_matcher = parser_with_context!(plain_list)(&initial_context); let (remaining, result) = plain_list_matcher(input).unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!(result.source, "1."); } #[test] fn plain_list_simple() { let input = OrgSource::new("1. foo"); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_matcher = parser_with_context!(plain_list)(&initial_context); let (remaining, result) = plain_list_matcher(input).unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!(result.source, "1. foo"); } #[test] fn plain_list_cant_start_line_with_asterisk() { // Plain lists with an asterisk bullet must be indented or else they would be a headline let input = OrgSource::new("* foo"); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_matcher = parser_with_context!(plain_list)(&initial_context); let result = plain_list_matcher(input); assert!(result.is_err()); } #[test] fn indented_can_start_line_with_asterisk() { // Plain lists with an asterisk bullet must be indented or else they would be a headline let input = OrgSource::new(" * foo"); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_matcher = parser_with_context!(plain_list)(&initial_context); let result = plain_list_matcher(input); assert!(result.is_ok()); } #[test] fn two_blank_lines_ends_list() { let input = OrgSource::new( r#"1. foo 2. bar baz 3. lorem ipsum "#, ); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_matcher = parser_with_context!(element(true))(&initial_context); let (remaining, result) = plain_list_matcher(input).expect("Should parse the plain list successfully."); assert_eq!(Into::<&str>::into(remaining), " ipsum\n"); assert_eq!( result.get_source(), r#"1. foo 2. bar baz 3. lorem "# ); } #[test] fn two_blank_lines_ends_nested_list() { let input = OrgSource::new( r#"1. foo 1. bar baz"#, ); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_matcher = parser_with_context!(element(true))(&initial_context); let (remaining, result) = plain_list_matcher(input).expect("Should parse the plain list successfully."); assert_eq!(Into::<&str>::into(remaining), "baz"); assert_eq!( result.get_source(), r#"1. foo 1. bar "# ); } #[test] fn interior_trailing_whitespace() { let input = OrgSource::new( r#"1. foo bar 1. baz lorem ipsum dolar"#, ); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_matcher = parser_with_context!(element(true))(&initial_context); let (remaining, result) = plain_list_matcher(input).expect("Should parse the plain list successfully."); assert_eq!(Into::<&str>::into(remaining), "dolar"); assert_eq!( result.get_source(), r#"1. foo bar 1. baz lorem ipsum "# ); } #[test] fn detect_line_break() { let input = OrgSource::new( r#"+ "#, ); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let result = detect_plain_list(&initial_context, input); assert!(result.is_ok()); } #[test] fn detect_eof() { let input = OrgSource::new(r#"+"#); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let result = detect_plain_list(&initial_context, input); assert!(result.is_ok()); } #[test] fn detect_no_gap() { let input = OrgSource::new(r#"+foo"#); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let result = detect_plain_list(&initial_context, input); // Since there is no whitespace after the '+' this is a paragraph, not a plain list. assert!(result.is_err()); } #[test] fn detect_with_gap() { let input = OrgSource::new(r#"+ foo"#); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let result = detect_plain_list(&initial_context, input); assert!(result.is_ok()); } }