From b5b335b9b07e95eafaa82b992d8167c8b22a48b0 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 3 Apr 2023 16:29:47 -0400 Subject: [PATCH 1/2] Fix handling of whitespace at the end of paragraphs and mandatory whitespace in list items. --- src/parser/paragraph.rs | 11 ++++----- src/parser/plain_list.rs | 48 ++++++++++++++++++++++++++++------------ toy_language.txt | 21 ++++++++++++++++++ 3 files changed, 59 insertions(+), 21 deletions(-) diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index a6af51d..a580596 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -1,5 +1,4 @@ use nom::branch::alt; -use nom::character::complete::line_ending; use nom::combinator::eof; use nom::combinator::peek; use nom::combinator::recognize; @@ -14,6 +13,7 @@ use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::exit_matcher_parser; +use crate::parser::util::start_of_line; use super::element::non_paragraph_element; use super::error::Res; @@ -32,14 +32,10 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); let (remaining, (children, _exit_contents)) = verify( - many_till( - standard_set_object_matcher, - peek(alt((eof, recognize(tuple((line_ending, exit_matcher)))))), - ), + many_till(standard_set_object_matcher, peek(recognize(exit_matcher))), |(children, _exit_contents)| !children.is_empty(), )(input)?; - let (remaining, _linebreak) = alt((eof, line_ending))(remaining)?; let source = get_consumed(input, remaining); Ok((remaining, Paragraph { source, children })) @@ -48,8 +44,9 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st #[tracing::instrument(ret, level = "debug")] fn paragraph_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { let non_paragraph_element_matcher = parser_with_context!(non_paragraph_element)(context); + let start_of_line_matcher = parser_with_context!(start_of_line)(&context); alt(( - recognize(many1(blank_line)), + recognize(tuple((start_of_line_matcher, many1(blank_line)))), recognize(non_paragraph_element_matcher), eof, ))(input) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index f3e077f..50ee254 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -16,8 +16,10 @@ use crate::parser::util::start_of_line; use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::digit1; +use nom::character::complete::line_ending; use nom::character::complete::one_of; use nom::character::complete::space0; +use nom::character::complete::space1; use nom::combinator::eof; use nom::combinator::recognize; use nom::combinator::verify; @@ -57,24 +59,42 @@ pub fn plain_list_item<'r, 's>( let element_matcher = parser_with_context!(element)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, (bull, _ws)) = tuple((bullet, space0))(remaining)?; - let (remaining, (contents, _exit_contents)) = - many_till(element_matcher, exit_matcher)(remaining)?; - let source = get_consumed(input, remaining); - - Ok(( - remaining, - PlainListItem { - source, - indentation: indent_level, - bullet: bull, - contents, - }, - )) + let (remaining, bull) = bullet(remaining)?; + let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining); + match maybe_contentless_item { + Ok((rem, _ws)) => { + let source = get_consumed(input, rem); + return Ok(( + rem, + PlainListItem { + source, + indentation: indent_level, + bullet: bull, + contents: Vec::new(), + }, + )); + } + Err(_) => { + let (remaining, _ws) = space1(remaining)?; + let (remaining, (contents, _exit_contents)) = + many_till(element_matcher, exit_matcher)(remaining)?; + let source = get_consumed(input, remaining); + return Ok(( + remaining, + PlainListItem { + source, + indentation: indent_level, + bullet: bull, + contents, + }, + )); + } + }; } #[tracing::instrument(ret, level = "debug")] fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> { + // TODO: If asterisk, it cannot be at start of line or it would be a headline alt(( tag("*"), tag("-"), diff --git a/toy_language.txt b/toy_language.txt index de546e9..c506087 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -1,3 +1,24 @@ +prologue *goes here* I guess *bold +text* + +bold*wont* start *or stop*when there is text outside it + +I guess *regular + +text* + +[foo *bar] baz* car + + +*nesting *bold entrances* and* exits + +* Heading + +body of heading + +** Child heading +** Immediate second child heading + * Second top-level heading foo bar 1. This is a list immediately after a paragraph From c7c922a4df4d9ef884f02dbace9b5be3ccc781ec Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 3 Apr 2023 16:38:26 -0400 Subject: [PATCH 2/2] Do not allow plain lists to start with an asterisk with no indent. --- src/parser/plain_list.rs | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 50ee254..b0a0a60 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -59,7 +59,8 @@ pub fn plain_list_item<'r, 's>( let element_matcher = parser_with_context!(element)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, bull) = bullet(remaining)?; + let (remaining, bull) = + verify(bullet, |bull: &str| bull != "*" || indent_level > 0)(remaining)?; let maybe_contentless_item: Res<&str, &str> = alt((eof, line_ending))(remaining); match maybe_contentless_item { Ok((rem, _ws)) => { @@ -94,7 +95,6 @@ pub fn plain_list_item<'r, 's>( #[tracing::instrument(ret, level = "debug")] fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> { - // TODO: If asterisk, it cannot be at start of line or it would be a headline alt(( tag("*"), tag("-"), @@ -208,4 +208,28 @@ mod tests { assert_eq!(remaining, ""); assert_eq!(result.source, "1. foo"); } + + #[test] + fn plain_list_cant_start_line_with_asterisk() { + // Plain lists with an asterisk bullet must be indented or else they would be a headline + let input = "* foo"; + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let document_context = + initial_context.with_additional_node(ContextElement::DocumentRoot(input)); + let plain_list_matcher = parser_with_context!(plain_list)(&document_context); + let result = plain_list_matcher(input); + assert!(result.is_err()); + } + + #[test] + fn indented_can_start_line_with_asterisk() { + // Plain lists with an asterisk bullet must be indented or else they would be a headline + let input = " * foo"; + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let document_context = + initial_context.with_additional_node(ContextElement::DocumentRoot(input)); + let plain_list_matcher = parser_with_context!(plain_list)(&document_context); + let result = plain_list_matcher(input); + assert!(result.is_ok()); + } }