use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::anychar; use nom::character::complete::digit1; use nom::character::complete::line_ending; use nom::character::complete::one_of; use nom::character::complete::space0; use nom::character::complete::space1; use nom::combinator::eof; use nom::combinator::map; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many0; use nom::multi::many1; use nom::multi::many_till; use nom::sequence::tuple; use super::element_parser::element; use super::object_parser::standard_set_object; use super::org_source::OrgSource; use super::util::include_input; use super::util::indentation_level; use super::util::non_whitespace_character; use crate::context::parser_with_context; use crate::context::ContextElement; use crate::context::ContextMatcher; use crate::context::ExitClass; use crate::context::ExitMatcherNode; use crate::context::RefContext; use crate::error::CustomError; use crate::error::MyError; use crate::error::Res; use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; use crate::parser::util::org_space; use crate::parser::util::start_of_line; use crate::types::CheckboxType; use crate::types::IndentationLevel; use crate::types::Object; use crate::types::PlainList; use crate::types::PlainListItem; use crate::types::PlainListType; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub(crate) fn detect_plain_list<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { if verify( tuple(( start_of_line, space0, parser_with_context!(bullet)(context), alt((space1, line_ending, eof)), )), |(_start, indent, (_bullet_type, bull), _after_whitespace)| { Into::<&str>::into(bull) != "*" || indent.len() > 0 }, )(input) .is_ok() { return Ok((input, ())); } return Err(nom::Err::Error(CustomError::MyError(MyError( "No element detected.".into(), )))); } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub(crate) fn plain_list<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, PlainList<'s>> { let contexts = [ ContextElement::Context("plain list"), ContextElement::ConsumeTrailingWhitespace(true), ContextElement::ExitMatcherNode(ExitMatcherNode { class: ExitClass::Beta, exit_matcher: &plain_list_end, }), ]; let parser_context = context.with_additional_node(&contexts[0]); let parser_context = parser_context.with_additional_node(&contexts[1]); let parser_context = parser_context.with_additional_node(&contexts[2]); // children stores tuple of (input string, parsed object) so we can re-parse the final item let mut children = Vec::new(); let mut first_item_indentation: Option = None; let mut first_item_list_type: Option = None; let mut remaining = input; // The final list item does not consume trailing blank lines (which instead get consumed by the list). We have three options here: // // 1. Parse all items while consuming trailing whitespace, then edit the final item to remove trailing whitespace. // 2. Parse all items without consuming trailing whitespace, then edit all but the final one to add in the trailing whitespace. // 3. Re-parse the final item with consume trailing whitespace disabled. // // While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed. loop { let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining); match (&first_item_list_type, &list_item) { (None, Ok((_remain, (list_type, _item)))) => { let _ = first_item_list_type.insert(*list_type); } (None, Err(_)) => {} (Some(_), _) => {} }; match list_item { Ok((remain, (_list_type, item))) if item.indentation == *first_item_indentation.get_or_insert(item.indentation) => { children.push((remaining, item)); remaining = remain; } Ok(_) | Err(_) => { break; } }; let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining); if maybe_exit.is_ok() { break; } } let (final_child_start, _final_item_first_parse) = match children.pop() { Some(final_child) => final_child, None => { return Err(nom::Err::Error(CustomError::MyError(MyError( "Plain lists require at least one element.".into(), )))); } }; let final_item_context = ContextElement::ConsumeTrailingWhitespace(false); let final_item_context = parser_context.with_additional_node(&final_item_context); let (remaining, (_, reparsed_final_item)) = parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?; children.push((final_child_start, reparsed_final_item)); let source = get_consumed(input, remaining); Ok(( remaining, PlainList { source: source.into(), list_type: first_item_list_type.expect("Plain lists require at least one element."), children: children.into_iter().map(|(_start, item)| item).collect(), }, )) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn plain_list_item<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, (PlainListType, PlainListItem<'s>)> { start_of_line(input)?; let (remaining, (indent_level, _leading_whitespace)) = indentation_level(context, input)?; let (remaining, (bullet_type, bull)) = verify( parser_with_context!(bullet)(context), |(_bullet_type, bull)| Into::<&str>::into(bull) != "*" || indent_level > 0, )(remaining)?; let (remaining, _maybe_counter_set) = opt(tuple((space1, tag("[@"), counter_set_value, tag("]"))))(remaining)?; let (remaining, maybe_checkbox) = opt(tuple((space1, item_checkbox)))(remaining)?; let (remaining, maybe_tag) = if let BulletType::Unordered = bullet_type { opt(tuple((space1, parser_with_context!(item_tag)(context))))(remaining)? } else { (remaining, None) }; let list_type = match (&maybe_tag, bullet_type) { (None, BulletType::Ordered) => PlainListType::Ordered, (None, BulletType::Unordered) => PlainListType::Unordered, (Some(_), BulletType::Ordered) => unreachable!(), (Some(_), BulletType::Unordered) => PlainListType::Descriptive, }; let exit_matcher = plain_list_item_end(indent_level); let contexts = [ ContextElement::ConsumeTrailingWhitespace(true), ContextElement::ExitMatcherNode(ExitMatcherNode { class: ExitClass::Beta, exit_matcher: &exit_matcher, }), ]; let parser_context = context.with_additional_node(&contexts[0]); let parser_context = parser_context.with_additional_node(&contexts[1]); let maybe_contentless_item: Res, ()> = peek(parser_with_context!( detect_contentless_item_contents )(&parser_context))(remaining); match maybe_contentless_item { Ok((_rem, _ws)) => { let (remaining, _trailing_ws) = if context.should_consume_trailing_whitespace() { recognize(alt((recognize(many1(blank_line)), eof)))(remaining)? } else { recognize(alt((blank_line, eof)))(remaining)? }; let source = get_consumed(input, remaining); return Ok(( remaining, ( list_type, PlainListItem { source: source.into(), indentation: indent_level, bullet: bull.into(), checkbox: None, tag: maybe_tag .map(|(_ws, item_tag)| item_tag) .unwrap_or(Vec::new()), children: Vec::new(), }, ), )); } Err(_) => {} }; let (remaining, _ws) = item_tag_post_gap(&parser_context, remaining)?; let (mut remaining, (mut children, _exit_contents)) = many_till( include_input(parser_with_context!(element(true))(&parser_context)), parser_with_context!(exit_matcher_parser)(&parser_context), )(remaining)?; if !children.is_empty() && !context.should_consume_trailing_whitespace() { let final_item_context = ContextElement::ConsumeTrailingWhitespace(false); let final_item_context = parser_context.with_additional_node(&final_item_context); let (final_child_start, _original_final_child) = children .pop() .expect("if-statement already checked that children was non-empty."); let (remain, reparsed_final_element) = include_input(parser_with_context!(element(true))( &final_item_context, ))(final_child_start)?; remaining = remain; children.push(reparsed_final_element); } let (remaining, _trailing_ws) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); return Ok(( remaining, ( list_type, PlainListItem { source: source.into(), indentation: indent_level, bullet: bull.into(), checkbox: maybe_checkbox.map(|(_, (checkbox_type, source))| { (checkbox_type, Into::<&str>::into(source)) }), tag: maybe_tag .map(|(_ws, item_tag)| item_tag) .unwrap_or(Vec::new()), children: children.into_iter().map(|(_start, item)| item).collect(), }, ), )); } #[derive(Debug)] enum BulletType { Ordered, Unordered, } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn bullet<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, (BulletType, OrgSource<'s>)> { alt(( map(tag("*"), |bull| (BulletType::Unordered, bull)), map(tag("-"), |bull| (BulletType::Unordered, bull)), map(tag("+"), |bull| (BulletType::Unordered, bull)), map( recognize(tuple(( parser_with_context!(counter)(context), alt((tag("."), tag(")"))), ))), |bull| (BulletType::Ordered, bull), ), ))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn counter<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { if context.get_global_settings().org_list_allow_alphabetical { alt(( recognize(one_of( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", )), digit1, ))(input) } else { digit1(input) } } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn counter_set_value<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { alt(( recognize(one_of( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", )), digit1, ))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn plain_list_end<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { recognize(tuple(( start_of_line, verify(many1(blank_line), |lines: &Vec>| { lines.len() >= 2 }), )))(input) } const fn plain_list_item_end(indent_level: IndentationLevel) -> impl ContextMatcher { let line_indented_lte_matcher = line_indented_lte(indent_level); move |context, input: OrgSource<'_>| { _plain_list_item_end(context, input, &line_indented_lte_matcher) } } #[cfg_attr( feature = "tracing", tracing::instrument(ret, level = "debug", skip(line_indented_lte_matcher)) )] fn _plain_list_item_end<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, line_indented_lte_matcher: impl ContextMatcher, ) -> Res, OrgSource<'s>> { start_of_line(input)?; recognize(tuple(( opt(blank_line), parser_with_context!(line_indented_lte_matcher)(context), )))(input) } const fn line_indented_lte(indent_level: IndentationLevel) -> impl ContextMatcher { move |context, input: OrgSource<'_>| _line_indented_lte(context, input, indent_level) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn _line_indented_lte<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, indent_level: IndentationLevel, ) -> Res, OrgSource<'s>> { let matched = recognize(verify( tuple(( parser_with_context!(indentation_level)(context), non_whitespace_character, )), // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) |((indentation_level, _leading_whitespace), _anychar)| *indentation_level <= indent_level, ))(input)?; Ok(matched) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn item_tag<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, Vec>> { let parser_context = ContextElement::ExitMatcherNode(ExitMatcherNode { class: ExitClass::Gamma, exit_matcher: &item_tag_end, }); let parser_context = context.with_additional_node(&parser_context); let (remaining, (children, _exit_contents)) = verify( many_till( // TODO: Should this be using a different set like the minimal set? parser_with_context!(standard_set_object)(&parser_context), parser_with_context!(exit_matcher_parser)(&parser_context), ), |(children, _exit_contents)| !children.is_empty(), )(input)?; let (remaining, _) = item_tag_divider(remaining)?; Ok((remaining, children)) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn item_tag_end<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { alt((item_tag_divider, line_ending))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn item_tag_divider<'s>(input: OrgSource<'s>) -> Res, OrgSource<'s>> { recognize(tuple(( one_of(" \t"), tag("::"), peek(tuple(( opt(tuple(( peek(one_of(" \t")), many_till(anychar, peek(alt((item_tag_divider, line_ending, eof)))), ))), alt((line_ending, eof)), ))), )))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn item_tag_post_gap<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, OrgSource<'s>> { verify( recognize(tuple(( alt((blank_line, space0)), many_till( blank_line, alt(( peek(recognize(not(blank_line))), peek(recognize(tuple((many0(blank_line), eof)))), parser_with_context!(exit_matcher_parser)(context), )), ), ))), |gap| gap.len() > 0, )(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn item_checkbox<'s>(input: OrgSource<'s>) -> Res, (CheckboxType, OrgSource<'s>)> { alt(( map( recognize(tuple((tag("["), org_space, tag("]")))), |capture| (CheckboxType::Off, capture), ), map(tag("[-]"), |capture| (CheckboxType::Trans, capture)), map(tag("[X]"), |capture| (CheckboxType::On, capture)), ))(input) } #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn detect_contentless_item_contents<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, ) -> Res, ()> { let (remaining, _) = recognize(many_till( blank_line, parser_with_context!(exit_matcher_parser)(context), ))(input)?; Ok((remaining, ())) } #[cfg(test)] mod tests { use super::*; use crate::context::Context; use crate::context::GlobalSettings; use crate::context::List; use crate::types::GetStandardProperties; #[test] fn plain_list_item_empty() { let input = OrgSource::new("1."); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_item_matcher = parser_with_context!(plain_list_item)(&initial_context); let (remaining, (_, result)) = plain_list_item_matcher(input).unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!(result.get_standard_properties().get_source(), "1."); } #[test] fn plain_list_item_simple() { let input = OrgSource::new("1. foo"); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_item_matcher = parser_with_context!(plain_list_item)(&initial_context); let (remaining, (_, result)) = plain_list_item_matcher(input).unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!(result.get_standard_properties().get_source(), "1. foo"); } #[test] fn plain_list_empty() { let input = OrgSource::new("1."); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_matcher = parser_with_context!(plain_list)(&initial_context); let (remaining, result) = plain_list_matcher(input).unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!(result.get_standard_properties().get_source(), "1."); } #[test] fn plain_list_simple() { let input = OrgSource::new("1. foo"); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_matcher = parser_with_context!(plain_list)(&initial_context); let (remaining, result) = plain_list_matcher(input).unwrap(); assert_eq!(Into::<&str>::into(remaining), ""); assert_eq!(result.get_standard_properties().get_source(), "1. foo"); } #[test] fn plain_list_cant_start_line_with_asterisk() { // Plain lists with an asterisk bullet must be indented or else they would be a headline let input = OrgSource::new("* foo"); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_matcher = parser_with_context!(plain_list)(&initial_context); let result = plain_list_matcher(input); assert!(result.is_err()); } #[test] fn indented_can_start_line_with_asterisk() { // Plain lists with an asterisk bullet must be indented or else they would be a headline let input = OrgSource::new(" * foo"); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_matcher = parser_with_context!(plain_list)(&initial_context); let result = plain_list_matcher(input); assert!(result.is_ok()); } #[test] fn two_blank_lines_ends_list() { let input = OrgSource::new( r#"1. foo 2. bar baz 3. lorem ipsum "#, ); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_matcher = parser_with_context!(element(true))(&initial_context); let (remaining, result) = plain_list_matcher(input).expect("Should parse the plain list successfully."); assert_eq!(Into::<&str>::into(remaining), " ipsum\n"); assert_eq!( result.get_standard_properties().get_source(), r#"1. foo 2. bar baz 3. lorem "# ); } #[test] fn two_blank_lines_ends_nested_list() { let input = OrgSource::new( r#"1. foo 1. bar baz"#, ); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_matcher = parser_with_context!(element(true))(&initial_context); let (remaining, result) = plain_list_matcher(input).expect("Should parse the plain list successfully."); assert_eq!(Into::<&str>::into(remaining), "baz"); assert_eq!( result.get_standard_properties().get_source(), r#"1. foo 1. bar "# ); } #[test] fn interior_trailing_whitespace() { let input = OrgSource::new( r#"1. foo bar 1. baz lorem ipsum dolar"#, ); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let plain_list_matcher = parser_with_context!(element(true))(&initial_context); let (remaining, result) = plain_list_matcher(input).expect("Should parse the plain list successfully."); assert_eq!(Into::<&str>::into(remaining), "dolar"); assert_eq!( result.get_standard_properties().get_source(), r#"1. foo bar 1. baz lorem ipsum "# ); } #[test] fn detect_line_break() { let input = OrgSource::new( r#"+ "#, ); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let result = detect_plain_list(&initial_context, input); assert!(result.is_ok()); } #[test] fn detect_eof() { let input = OrgSource::new(r#"+"#); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let result = detect_plain_list(&initial_context, input); assert!(result.is_ok()); } #[test] fn detect_no_gap() { let input = OrgSource::new(r#"+foo"#); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let result = detect_plain_list(&initial_context, input); // Since there is no whitespace after the '+' this is a paragraph, not a plain list. assert!(result.is_err()); } #[test] fn detect_with_gap() { let input = OrgSource::new(r#"+ foo"#); let global_settings = GlobalSettings::default(); let initial_context = ContextElement::document_context(); let initial_context = Context::new(&global_settings, List::new(&initial_context)); let result = detect_plain_list(&initial_context, input); assert!(result.is_ok()); } }