Fix plain list parser to not consume trailing whitespace on the last item.
This commit is contained in:
		
							parent
							
								
									665c1d70fb
								
							
						
					
					
						commit
						08fed1301e
					
				| @ -6,5 +6,7 @@ | ||||
| 
 | ||||
|       lorem | ||||
| 
 | ||||
|    ipsum | ||||
| 
 | ||||
| ipsum | ||||
| 
 | ||||
| dolar | ||||
|  | ||||
| @ -23,6 +23,7 @@ use nom::character::complete::one_of; | ||||
| use nom::character::complete::space0; | ||||
| use nom::character::complete::space1; | ||||
| use nom::combinator::eof; | ||||
| use nom::combinator::peek; | ||||
| use nom::combinator::recognize; | ||||
| use nom::combinator::verify; | ||||
| use nom::multi::many1; | ||||
| @ -35,26 +36,73 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s | ||||
|         context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { | ||||
|             exit_matcher: ChainBehavior::AndParent(Some(&plain_list_end)), | ||||
|         })); | ||||
|     let (mut remaining, first_item) = plain_list_item(&parser_context, input)?; | ||||
|     let first_item_indentation = first_item.indentation; | ||||
|     let plain_list_item_matcher = parser_with_context!(plain_list_item)(&parser_context); | ||||
|     let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); | ||||
|     let without_consume_context = | ||||
|         parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false)); | ||||
|     let with_consume_context = | ||||
|         parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true)); | ||||
|     let without_consume_matcher = parser_with_context!(plain_list_item)(&without_consume_context); | ||||
|     let with_consume_matcher = parser_with_context!(plain_list_item)(&with_consume_context); | ||||
|     let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context); | ||||
|     let mut children = Vec::new(); | ||||
|     children.push(first_item); | ||||
|     loop { | ||||
|         let exit_contents = exit_matcher(remaining); | ||||
|         if exit_contents.is_ok() { | ||||
|             break; | ||||
|         } | ||||
|     let mut first_item_indentation: Option<usize> = None; | ||||
|     let mut remaining = input; | ||||
| 
 | ||||
|         let next_list_item = plain_list_item_matcher(remaining); | ||||
|         match next_list_item { | ||||
|             Ok((remain, next_child)) if next_child.indentation == first_item_indentation => { | ||||
|                 children.push(next_child); | ||||
|     loop { | ||||
|         /* | ||||
|         Trailing whitespace belongs to the plain list, not the plain list item | ||||
| 
 | ||||
|         Possible outcomes: | ||||
|         Don't consume, yes exit matcher | ||||
|         Don't consume, no additional item | ||||
|         Consume, additional item | ||||
|         */ | ||||
|         let last_item_then_exit = tuple((without_consume_matcher, exit_matcher))(remaining); | ||||
|         match last_item_then_exit { | ||||
|             Ok((remain, (item, _exit))) | ||||
|                 if item.indentation == *first_item_indentation.get_or_insert(item.indentation) => | ||||
|             { | ||||
|                 remaining = remain; | ||||
|                 children.push(item); | ||||
|                 break; | ||||
|             } | ||||
|             Ok(_) | Err(_) => break, | ||||
|             Ok(_) | Err(_) => {} | ||||
|         }; | ||||
| 
 | ||||
|         let not_last_item = tuple((with_consume_matcher, peek(without_consume_matcher)))(remaining); | ||||
|         match not_last_item { | ||||
|             Ok((remain, (item, _future_item))) | ||||
|                 if item.indentation == *first_item_indentation.get_or_insert(item.indentation) => | ||||
|             { | ||||
|                 remaining = remain; | ||||
|                 children.push(item); | ||||
|                 continue; | ||||
|             } | ||||
|             Ok(_) | Err(_) => {} | ||||
|         }; | ||||
| 
 | ||||
|         // If its not (don't consume, exit) and its not (consume, see another item) then it must be (don't consume, no additional item)
 | ||||
|         let last_item_then_exit = without_consume_matcher(remaining); | ||||
|         match last_item_then_exit { | ||||
|             Ok((remain, item)) | ||||
|                 if item.indentation == *first_item_indentation.get_or_insert(item.indentation) => | ||||
|             { | ||||
|                 remaining = remain; | ||||
|                 children.push(item); | ||||
|                 break; | ||||
|             } | ||||
|             Ok(_) | Err(_) => { | ||||
|                 return Err(nom::Err::Error(CustomError::MyError(MyError( | ||||
|                     "Should be unreachable.", | ||||
|                 )))); | ||||
|                 unreachable!(); | ||||
|             } | ||||
|         }; | ||||
|     } | ||||
| 
 | ||||
|     if children.is_empty() { | ||||
|         return Err(nom::Err::Error(CustomError::MyError(MyError( | ||||
|             "Plain lists require at least one element.", | ||||
|         )))); | ||||
|     } | ||||
| 
 | ||||
|     let (remaining, _trailing_ws) = | ||||
| @ -73,20 +121,27 @@ pub fn plain_list_item<'r, 's>( | ||||
|     let (remaining, leading_whitespace) = space0(input)?; | ||||
|     // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
 | ||||
|     let indent_level = leading_whitespace.len(); | ||||
|     let parser_context = context | ||||
|     let with_consume_context = context | ||||
|         .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true)) | ||||
|         .with_additional_node(ContextElement::ListItem(indent_level)) | ||||
|         .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { | ||||
|             exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)), | ||||
|         })); | ||||
|     let without_consume_context = context | ||||
|         .with_additional_node(ContextElement::ConsumeTrailingWhitespace(false)) | ||||
|         .with_additional_node(ContextElement::ListItem(indent_level)) | ||||
|         .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { | ||||
|             exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)), | ||||
|         })); | ||||
| 
 | ||||
|     let element_matcher = parser_with_context!(element)(&parser_context); | ||||
|     let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); | ||||
|     let element_matcher = parser_with_context!(element)(&with_consume_context); | ||||
|     let exit_matcher = parser_with_context!(exit_matcher_parser)(&with_consume_context); | ||||
|     let (remaining, bull) = | ||||
|         verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?; | ||||
|     let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining); | ||||
|     match maybe_contentless_item { | ||||
|         Ok((rem, _ws)) => { | ||||
|             // TODO: do we need to consume if this isn't the last item?
 | ||||
|             let source = get_consumed(input, rem); | ||||
|             return Ok(( | ||||
|                 rem, | ||||
| @ -329,15 +384,17 @@ baz"#; | ||||
| 
 | ||||
|       lorem | ||||
| 
 | ||||
|    ipsum | ||||
| 
 | ||||
| ipsum"#;
 | ||||
| 
 | ||||
| dolar"#;
 | ||||
|         let initial_context: ContextTree<'_, '_> = ContextTree::new(); | ||||
|         let document_context = | ||||
|             initial_context.with_additional_node(ContextElement::DocumentRoot(input)); | ||||
|         let plain_list_matcher = parser_with_context!(plain_list)(&document_context); | ||||
|         let (remaining, result) = | ||||
|             plain_list_matcher(input).expect("Should parse the plain list successfully."); | ||||
|         assert_eq!(remaining, "ipsum"); | ||||
|         assert_eq!(remaining, "dolar"); | ||||
|         assert_eq!( | ||||
|             result.get_source(), | ||||
|             r#"1. foo
 | ||||
| @ -348,6 +405,8 @@ ipsum"#; | ||||
| 
 | ||||
|       lorem | ||||
| 
 | ||||
|    ipsum | ||||
| 
 | ||||
| 
 | ||||
| "#
 | ||||
|         ); | ||||
|  | ||||
| @ -6,5 +6,7 @@ | ||||
| 
 | ||||
|       lorem | ||||
| 
 | ||||
|    ipsum | ||||
| 
 | ||||
| ipsum | ||||
| 
 | ||||
| dolar | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Tom Alexander
						Tom Alexander