Merge branch 'list_perf_improvement'
This commit is contained in:
		
						commit
						8db9038c53
					
				| @ -3,6 +3,7 @@ use nom::bytes::complete::tag; | ||||
| use nom::character::complete::anychar; | ||||
| use nom::character::complete::digit1; | ||||
| use nom::character::complete::line_ending; | ||||
| use nom::character::complete::multispace1; | ||||
| use nom::character::complete::one_of; | ||||
| use nom::character::complete::space0; | ||||
| use nom::character::complete::space1; | ||||
| @ -17,6 +18,7 @@ use nom::multi::many0; | ||||
| use nom::multi::many1; | ||||
| use nom::multi::many_till; | ||||
| use nom::sequence::tuple; | ||||
| use nom::InputTake; | ||||
| 
 | ||||
| use super::affiliated_keyword::parse_affiliated_keywords; | ||||
| use super::element_parser::element; | ||||
| @ -25,6 +27,7 @@ use super::org_source::OrgSource; | ||||
| use super::util::include_input; | ||||
| use super::util::indentation_level; | ||||
| use super::util::non_whitespace_character; | ||||
| use crate::context::bind_context; | ||||
| use crate::context::parser_with_context; | ||||
| use crate::context::ContextElement; | ||||
| use crate::context::ContextMatcher; | ||||
| @ -80,6 +83,46 @@ where | ||||
|     return Err(nom::Err::Error(CustomError::Static("No element detected."))); | ||||
| } | ||||
| 
 | ||||
| #[cfg_attr(
 | ||||
|     feature = "tracing", | ||||
|     tracing::instrument(ret, level = "debug", skip(context)) | ||||
| )] | ||||
| pub(crate) fn detect_not_plain_list_item_indent<'b, 'g, 'r, 's>( | ||||
|     context: RefContext<'b, 'g, 'r, 's>, | ||||
|     input: OrgSource<'s>, | ||||
| ) -> Res<OrgSource<'s>, (u16, OrgSource<'s>)> { | ||||
|     if let Ok((_remaining, (_, indent, _))) = tuple(( | ||||
|         start_of_line, | ||||
|         parser_with_context!(indentation_level)(context), | ||||
|         not(tuple(( | ||||
|             parser_with_context!(bullet)(context), | ||||
|             alt((space1, line_ending, eof)), | ||||
|         ))), | ||||
|     ))(input) | ||||
|     { | ||||
|         return Ok((input, indent)); | ||||
|     } | ||||
| 
 | ||||
|     // Headlines are not plain list items.
 | ||||
|     if let Ok((_remaining, (_, indent, _))) = verify( | ||||
|         tuple(( | ||||
|             start_of_line, | ||||
|             parser_with_context!(indentation_level)(context), | ||||
|             tuple(( | ||||
|                 parser_with_context!(bullet)(context), | ||||
|                 alt((space1, line_ending, eof)), | ||||
|             )), | ||||
|         )), | ||||
|         |(_, (depth, _), ((_, bullet), _))| { | ||||
|             *depth == 0 && Into::<&str>::into(bullet).starts_with('*') | ||||
|         }, | ||||
|     )(input) | ||||
|     { | ||||
|         return Ok((input, indent)); | ||||
|     } | ||||
|     return Err(nom::Err::Error(CustomError::Static("No element detected."))); | ||||
| } | ||||
| 
 | ||||
| #[cfg_attr(
 | ||||
|     feature = "tracing", | ||||
|     tracing::instrument(ret, level = "debug", skip(context, affiliated_keywords)) | ||||
| @ -120,7 +163,7 @@ where | ||||
|     // While #3 is the most slow, it also seems to cleanest and involves the least manual mutation of already-parsed objects so I am going with #3 for now, but we should revisit #1 or #2 when the parser is more developed.
 | ||||
| 
 | ||||
|     loop { | ||||
|         let list_item = parser_with_context!(plain_list_item)(&parser_context)(remaining); | ||||
|         let list_item = plain_list_item(&parser_context, remaining); | ||||
|         match (&first_item_list_type, &list_item) { | ||||
|             (None, Ok((_remain, (list_type, _item)))) => { | ||||
|                 let _ = first_item_list_type.insert(*list_type); | ||||
| @ -140,25 +183,17 @@ where | ||||
|             } | ||||
|         }; | ||||
| 
 | ||||
|         let maybe_exit = parser_with_context!(exit_matcher_parser)(&parser_context)(remaining); | ||||
|         let maybe_exit = exit_matcher_parser(&parser_context, remaining); | ||||
|         if maybe_exit.is_ok() { | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     let (final_child_start, _final_item_first_parse) = match children.pop() { | ||||
|         Some(final_child) => final_child, | ||||
|         None => { | ||||
|             return Err(nom::Err::Error(CustomError::Static( | ||||
|                 "Plain lists require at least one element.", | ||||
|             ))); | ||||
|         } | ||||
|     }; | ||||
|     let final_item_context = ContextElement::ConsumeTrailingWhitespace(false); | ||||
|     let final_item_context = parser_context.with_additional_node(&final_item_context); | ||||
|     let (remaining, (_, reparsed_final_item)) = | ||||
|         parser_with_context!(plain_list_item)(&final_item_context)(final_child_start)?; | ||||
|     children.push((final_child_start, reparsed_final_item)); | ||||
|     if children.is_empty() { | ||||
|         return Err(nom::Err::Error(CustomError::Static( | ||||
|             "Plain lists require at least one element.", | ||||
|         ))); | ||||
|     } | ||||
| 
 | ||||
|     let (remaining, _trailing_ws) = | ||||
|         maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; | ||||
| @ -187,10 +222,10 @@ fn plain_list_item<'b, 'g, 'r, 's>( | ||||
| ) -> Res<OrgSource<'s>, (PlainListType, PlainListItem<'s>)> { | ||||
|     start_of_line(input)?; | ||||
|     let (remaining, (indent_level, _leading_whitespace)) = indentation_level(context, input)?; | ||||
|     let (remaining, (bullet_type, bull)) = verify( | ||||
|         parser_with_context!(bullet)(context), | ||||
|         |(_bullet_type, bull)| !Into::<&str>::into(bull).starts_with('*') || indent_level > 0, | ||||
|     )(remaining)?; | ||||
|     let (remaining, (bullet_type, bull)) = | ||||
|         verify(bind_context!(bullet, context), |(_bullet_type, bull)| { | ||||
|             !Into::<&str>::into(bull).starts_with('*') || indent_level > 0 | ||||
|         })(remaining)?; | ||||
| 
 | ||||
|     let (remaining, maybe_counter_set) = | ||||
|         opt(tuple((space1, tag("[@"), counter_set_value, tag("]"))))(remaining)?; | ||||
| @ -199,7 +234,7 @@ fn plain_list_item<'b, 'g, 'r, 's>( | ||||
|     let (remaining, maybe_checkbox) = opt(tuple((space1, item_checkbox)))(remaining)?; | ||||
| 
 | ||||
|     let (remaining, maybe_tag) = if let BulletType::Unordered = bullet_type { | ||||
|         opt(tuple((space1, parser_with_context!(item_tag)(context))))(remaining)? | ||||
|         opt(tuple((space1, bind_context!(item_tag, context))))(remaining)? | ||||
|     } else { | ||||
|         (remaining, None) | ||||
|     }; | ||||
| @ -211,6 +246,12 @@ fn plain_list_item<'b, 'g, 'r, 's>( | ||||
|     }; | ||||
| 
 | ||||
|     let exit_matcher = plain_list_item_end(indent_level); | ||||
|     let final_item_whitespace_cutoff = final_item_whitespace_cutoff(indent_level); | ||||
|     let final_whitespace_context = ContextElement::ExitMatcherNode(ExitMatcherNode { | ||||
|         class: ExitClass::Beta, | ||||
|         exit_matcher: &final_item_whitespace_cutoff, | ||||
|     }); | ||||
|     let final_whitespace_context = context.with_additional_node(&final_whitespace_context); | ||||
|     let contexts = [ | ||||
|         ContextElement::ConsumeTrailingWhitespace(true), | ||||
|         ContextElement::ExitMatcherNode(ExitMatcherNode { | ||||
| @ -218,17 +259,21 @@ fn plain_list_item<'b, 'g, 'r, 's>( | ||||
|             exit_matcher: &exit_matcher, | ||||
|         }), | ||||
|     ]; | ||||
|     let parser_context = context.with_additional_node(&contexts[0]); | ||||
|     let parser_context = final_whitespace_context.with_additional_node(&contexts[0]); | ||||
|     let parser_context = parser_context.with_additional_node(&contexts[1]); | ||||
| 
 | ||||
|     let maybe_contentless_item: Res<OrgSource<'_>, ()> = peek(parser_with_context!( | ||||
|         detect_contentless_item_contents | ||||
|     )(&parser_context))(remaining); | ||||
|     let maybe_contentless_item: Res<OrgSource<'_>, ()> = | ||||
|         detect_contentless_item_contents(&parser_context, remaining); | ||||
|     if let Ok((_rem, _ws)) = maybe_contentless_item { | ||||
|         let (remaining, _trailing_ws) = if context.should_consume_trailing_whitespace() { | ||||
|             recognize(alt((recognize(many1(blank_line)), eof)))(remaining)? | ||||
|         } else { | ||||
|         let (remaining, _trailing_ws) = if tuple(( | ||||
|             blank_line, | ||||
|             bind_context!(final_item_whitespace_cutoff, context), | ||||
|         ))(remaining) | ||||
|         .is_ok() | ||||
|         { | ||||
|             recognize(alt((blank_line, eof)))(remaining)? | ||||
|         } else { | ||||
|             recognize(alt((recognize(many1(blank_line)), eof)))(remaining)? | ||||
|         }; | ||||
|         let source = get_consumed(input, remaining); | ||||
|         return Ok(( | ||||
| @ -256,26 +301,14 @@ fn plain_list_item<'b, 'g, 'r, 's>( | ||||
|         .filter(|b| *b == b'\n') | ||||
|         .count(); | ||||
| 
 | ||||
|     let (mut remaining, (mut children, _exit_contents)) = many_till( | ||||
|         include_input(parser_with_context!(element(true))(&parser_context)), | ||||
|         parser_with_context!(exit_matcher_parser)(&parser_context), | ||||
|     let (remaining, (children, _exit_contents)) = many_till( | ||||
|         include_input(bind_context!(element(true), &parser_context)), | ||||
|         bind_context!(exit_matcher_parser, &parser_context), | ||||
|     )(remaining)?; | ||||
| 
 | ||||
|     if !children.is_empty() && !context.should_consume_trailing_whitespace() { | ||||
|         let final_item_context = ContextElement::ConsumeTrailingWhitespace(false); | ||||
|         let final_item_context = parser_context.with_additional_node(&final_item_context); | ||||
|         let (final_child_start, _original_final_child) = children | ||||
|             .pop() | ||||
|             .expect("if-statement already checked that children was non-empty."); | ||||
|         let (remain, reparsed_final_element) = include_input(parser_with_context!(element(true))( | ||||
|             &final_item_context, | ||||
|         ))(final_child_start)?; | ||||
|         remaining = remain; | ||||
|         children.push(reparsed_final_element); | ||||
|     } | ||||
| 
 | ||||
|     // We have to use the parser_context here to include the whitespace cut-off
 | ||||
|     let (remaining, _trailing_ws) = | ||||
|         maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; | ||||
|         maybe_consume_trailing_whitespace_if_not_exiting(&final_whitespace_context, remaining)?; | ||||
| 
 | ||||
|     let source = get_consumed(input, remaining); | ||||
|     return Ok(( | ||||
| @ -322,7 +355,7 @@ fn bullet<'b, 'g, 'r, 's>( | ||||
|             map(tag("+"), |bull| (BulletType::Unordered, bull)), | ||||
|             map( | ||||
|                 recognize(tuple(( | ||||
|                     parser_with_context!(counter)(context), | ||||
|                     bind_context!(counter, context), | ||||
|                     alt((tag("."), tag(")"))), | ||||
|                 ))), | ||||
|                 |bull| (BulletType::Ordered, bull), | ||||
| @ -377,6 +410,52 @@ fn counter_set_value<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, PlainListIt | ||||
|     ))(input) | ||||
| } | ||||
| 
 | ||||
| const fn final_item_whitespace_cutoff(indent_level: IndentationLevel) -> impl ContextMatcher { | ||||
|     move |context, input: OrgSource<'_>| { | ||||
|         impl_final_item_whitespace_cutoff(context, input, indent_level) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #[cfg_attr(
 | ||||
|     feature = "tracing", | ||||
|     tracing::instrument(ret, level = "debug", skip(context)) | ||||
| )] | ||||
| fn impl_final_item_whitespace_cutoff<'b, 'g, 'r, 's>( | ||||
|     context: RefContext<'b, 'g, 'r, 's>, | ||||
|     input: OrgSource<'s>, | ||||
|     indent_level: IndentationLevel, | ||||
| ) -> Res<OrgSource<'s>, OrgSource<'s>> { | ||||
|     start_of_line(input)?; | ||||
|     // element!(plain_list_end, context, input);
 | ||||
| 
 | ||||
|     if let Ok((_remaining, _)) = verify( | ||||
|         tuple(( | ||||
|             opt(blank_line), | ||||
|             bind_context!(indentation_level, context), | ||||
|             not(multispace1), | ||||
|         )), | ||||
|         |(_, (depth, _stars), _not_whitespace)| *depth < indent_level, | ||||
|     )(input) | ||||
|     { | ||||
|         return Ok((input, input.take(0))); | ||||
|     } | ||||
| 
 | ||||
|     if let Ok((_remaining, _)) = tuple(( | ||||
|         opt(blank_line), | ||||
|         verify( | ||||
|             bind_context!(detect_not_plain_list_item_indent, context), | ||||
|             |(depth, _)| *depth == indent_level, | ||||
|         ), | ||||
|     ))(input) | ||||
|     { | ||||
|         return Ok((input, input.take(0))); | ||||
|     } | ||||
| 
 | ||||
|     Err(nom::Err::Error(CustomError::Static( | ||||
|         "No whitespace cut-off.", | ||||
|     ))) | ||||
| } | ||||
| 
 | ||||
| #[cfg_attr(
 | ||||
|     feature = "tracing", | ||||
|     tracing::instrument(ret, level = "debug", skip(_context)) | ||||
| @ -412,7 +491,7 @@ fn _plain_list_item_end<'b, 'g, 'r, 's>( | ||||
|     start_of_line(input)?; | ||||
|     recognize(tuple(( | ||||
|         opt(blank_line), | ||||
|         parser_with_context!(line_indented_lte_matcher)(context), | ||||
|         bind_context!(line_indented_lte_matcher, context), | ||||
|     )))(input) | ||||
| } | ||||
| 
 | ||||
| @ -431,7 +510,7 @@ fn _line_indented_lte<'b, 'g, 'r, 's>( | ||||
| ) -> Res<OrgSource<'s>, OrgSource<'s>> { | ||||
|     let matched = recognize(verify( | ||||
|         tuple(( | ||||
|             parser_with_context!(indentation_level)(context), | ||||
|             bind_context!(indentation_level, context), | ||||
|             non_whitespace_character, | ||||
|         )), | ||||
|         // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
 | ||||
| @ -457,8 +536,8 @@ fn item_tag<'b, 'g, 'r, 's>( | ||||
|     let (remaining, (children, _exit_contents)) = verify( | ||||
|         many_till( | ||||
|             // TODO: Should this be using a different set like the minimal set?
 | ||||
|             parser_with_context!(standard_set_object)(&parser_context), | ||||
|             parser_with_context!(exit_matcher_parser)(&parser_context), | ||||
|             bind_context!(standard_set_object, &parser_context), | ||||
|             bind_context!(exit_matcher_parser, &parser_context), | ||||
|         ), | ||||
|         |(children, _exit_contents)| !children.is_empty(), | ||||
|     )(input)?; | ||||
| @ -508,7 +587,7 @@ fn item_tag_post_gap<'b, 'g, 'r, 's>( | ||||
|                 alt(( | ||||
|                     peek(recognize(not(blank_line))), | ||||
|                     peek(recognize(tuple((many0(blank_line), eof)))), | ||||
|                     parser_with_context!(exit_matcher_parser)(context), | ||||
|                     bind_context!(exit_matcher_parser, context), | ||||
|                 )), | ||||
|             ), | ||||
|         ))), | ||||
| @ -538,7 +617,7 @@ fn detect_contentless_item_contents<'b, 'g, 'r, 's>( | ||||
| ) -> Res<OrgSource<'s>, ()> { | ||||
|     let (remaining, _) = recognize(many_till( | ||||
|         blank_line, | ||||
|         parser_with_context!(exit_matcher_parser)(context), | ||||
|         bind_context!(exit_matcher_parser, context), | ||||
|     ))(input)?; | ||||
|     Ok((remaining, ())) | ||||
| } | ||||
|  | ||||
| @ -243,6 +243,10 @@ pub(crate) fn org_line_ending(input: OrgSource<'_>) -> Res<OrgSource<'_>, OrgSou | ||||
| } | ||||
| 
 | ||||
| /// Match the whitespace at the beginning of a line and give it an indentation level.
 | ||||
| #[cfg_attr(
 | ||||
|     feature = "tracing", | ||||
|     tracing::instrument(ret, level = "debug", skip(context)) | ||||
| )] | ||||
| pub(crate) fn indentation_level<'s>( | ||||
|     context: RefContext<'_, '_, '_, 's>, | ||||
|     input: OrgSource<'s>, | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Tom Alexander
						Tom Alexander