From e5bc4cb14b0f92b4902aa3efabaed17b4f1a43bc Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 10 Apr 2023 11:50:43 -0400 Subject: [PATCH] Footnote definitions are parsing on their own. --- src/parser/document.rs | 8 ++------ src/parser/element.rs | 5 ----- src/parser/footnote_definition.rs | 6 ++++++ src/parser/greater_block.rs | 7 +++++++ src/parser/paragraph.rs | 9 ++------- src/parser/plain_list.rs | 4 ++++ src/parser/util.rs | 17 +++++++++++++++++ toy_language.txt | 30 ++++++++++++++++++++++++++++++ 8 files changed, 68 insertions(+), 18 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index b6eb652..ba8c69d 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -21,6 +21,7 @@ use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::util::element_trailing_whitespace; +use crate::parser::util::maybe_consume_trailing_whitespace; use super::element::Element; use super::error::Res; @@ -115,12 +116,7 @@ fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Sec // Check if a parent exit matcher is causing the exit exit_matcher_parser(context, remaining)?; - let (remaining, _trailing_ws) = if context.should_consume_trailing_whitespace() { - opt(parser_with_context!(element_trailing_whitespace)(&parser_context))(remaining)? - - } else { - (remaining, None) - }; + let (remaining, _trailing_ws) = maybe_consume_trailing_whitespace(context, remaining)?; let source = get_consumed(input, remaining); Ok((remaining, Section { source, children })) diff --git a/src/parser/element.rs b/src/parser/element.rs index 5a1b379..902b2a4 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -20,10 +20,6 @@ pub enum Element<'s> { PlainList(PlainList<'s>), GreaterBlock(GreaterBlock<'s>), FootnoteDefinition(FootnoteDefinition<'s>), - /// The whitespace that follows an element. - /// - /// This isn't a real org-mode element. Except for items in plain lists, trailing blank lines belong to the preceding element. It is a separate `Element` in this enum to make parsing easier. - TrailingWhitespace(&'s str), } impl<'s> Source<'s> for Element<'s> { @@ -32,7 +28,6 @@ impl<'s> Source<'s> for Element<'s> { Element::Paragraph(obj) => obj.source, Element::PlainList(obj) => obj.source, Element::GreaterBlock(obj) => obj.source, - Element::TrailingWhitespace(src) => src, Element::FootnoteDefinition(obj) => obj.source, } } diff --git a/src/parser/footnote_definition.rs b/src/parser/footnote_definition.rs index 5924c81..4e3403b 100644 --- a/src/parser/footnote_definition.rs +++ b/src/parser/footnote_definition.rs @@ -8,8 +8,10 @@ use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::blank_line; +use crate::parser::util::element_trailing_whitespace; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; +use crate::parser::util::maybe_consume_trailing_whitespace; use crate::parser::util::start_of_line; use crate::parser::util::whitespace_eof; use nom::branch::alt; @@ -19,6 +21,7 @@ use nom::bytes::complete::take_while; use nom::character::complete::digit1; use nom::character::complete::multispace0; use nom::character::complete::space0; +use nom::combinator::opt; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many1; @@ -44,6 +47,9 @@ pub fn footnote_definition<'r, 's>( let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); let (remaining, (children, _exit_contents)) = many_till(element_matcher, exit_matcher)(remaining)?; + + let (remaining, _trailing_ws) = maybe_consume_trailing_whitespace(context, remaining)?; + let source = get_consumed(input, remaining); Ok(( remaining, diff --git a/src/parser/greater_block.rs b/src/parser/greater_block.rs index a3f3915..8408740 100644 --- a/src/parser/greater_block.rs +++ b/src/parser/greater_block.rs @@ -8,8 +8,10 @@ use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::element_trailing_whitespace; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; +use crate::parser::util::maybe_consume_trailing_whitespace; use crate::parser::util::start_of_line; use nom::branch::alt; use nom::bytes::complete::is_not; @@ -53,6 +55,11 @@ pub fn greater_block<'r, 's>( let (remaining, (children, _exit_contents)) = many_till(element_matcher, exit_matcher)(remaining)?; let (remaining, _end) = greater_block_end(&parser_context, remaining)?; + + // Not checking if parent exit matcher is causing exit because the greater_block_end matcher asserts we matched a full greater block + + let (remaining, _trailing_ws) = maybe_consume_trailing_whitespace(context, remaining)?; + let parameters = match parameters { Some((_ws, parameters)) => Some(parameters), None => None, diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index e0a1235..9922f8b 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -14,6 +14,7 @@ use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; use crate::parser::util::element_trailing_whitespace; use crate::parser::util::exit_matcher_parser; +use crate::parser::util::maybe_consume_trailing_whitespace; use crate::parser::util::start_of_line; use super::element::non_paragraph_element; @@ -39,13 +40,7 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st // Not checking parent exit matcher because if there are any children matched then we have a valid paragraph. - let (remaining, _trailing_ws) = if context.should_consume_trailing_whitespace() { - opt(parser_with_context!(element_trailing_whitespace)( - &parser_context, - ))(remaining)? - } else { - (remaining, None) - }; + let (remaining, _trailing_ws) = maybe_consume_trailing_whitespace(context, remaining)?; let source = get_consumed(input, remaining); diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index d68aa9b..3564376 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -4,6 +4,7 @@ use super::error::Res; use super::greater_element::PlainList; use super::greater_element::PlainListItem; use super::parser_with_context::parser_with_context; +use super::util::maybe_consume_trailing_whitespace; use super::util::non_whitespace_character; use super::Context; use crate::parser::element::element; @@ -49,6 +50,9 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s Ok(_) | Err(_) => break, }; } + + let (remaining, _trailing_ws) = maybe_consume_trailing_whitespace(context, remaining)?; + let source = get_consumed(input, remaining); Ok((remaining, PlainList { source, children })) } diff --git a/src/parser/util.rs b/src/parser/util.rs index 9e4838e..51eaf92 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -1,3 +1,5 @@ +use crate::parser::parser_with_context::parser_with_context; + use super::error::CustomError; use super::error::MyError; use super::error::Res; @@ -10,6 +12,7 @@ use nom::character::complete::none_of; use nom::character::complete::space0; use nom::combinator::eof; use nom::combinator::not; +use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; use nom::multi::many0; @@ -89,6 +92,20 @@ pub fn element_trailing_whitespace<'r, 's>( alt((eof, recognize(many0(blank_line))))(input) } +#[tracing::instrument(ret, level = "debug")] +pub fn maybe_consume_trailing_whitespace<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, Option<&'s str>> { + if context.should_consume_trailing_whitespace() { + Ok(opt(parser_with_context!(element_trailing_whitespace)( + context, + ))(input)?) + } else { + Ok((input, None)) + } +} + #[tracing::instrument(ret, level = "debug")] pub fn trailing_whitespace(input: &str) -> Res<&str, &str> { alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input) diff --git a/toy_language.txt b/toy_language.txt index 8fbf6b4..5d18333 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -1,3 +1,33 @@ +prologue *goes here* I guess *bold +text* + +bold*wont* start *or stop*when there is text outside it + +I guess *regular + +text* + +[foo *bar] baz* car + + +*nesting *bold entrances* and* exits + +* Heading + +body of heading + +** Child heading +** Immediate second child heading + +* Second top-level heading +foo bar +1. This is a list immediately after a paragraph +2. This is a second item in the list + 1. This is a child of the second item +#+begin_center +1. foo +2. bar +#+end_center [fn:1] A footnote. [fn:2] A multi-