From 707eac5bf810dffe7cac8ca22d11a489f8c51edd Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 31 Mar 2023 11:16:37 -0400 Subject: [PATCH] Move trailing whitespace parsing to a separate element. I still need to parse the line break at the end of elements. --- src/parser/document.rs | 30 ++++++++++++++++++++++++++++-- src/parser/element.rs | 5 +++++ src/parser/paragraph.rs | 2 -- src/parser/plain_list.rs | 1 - src/parser/util.rs | 9 +++++++++ 5 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index dd5eb67e..7c002c89 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -20,10 +20,12 @@ use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::util::element_trailing_whitespace; use super::element::Element; use super::error::Res; use super::object::Object; +use super::parser_context; use super::parser_with_context::parser_with_context; use super::source::Source; use super::util::exit_matcher_parser; @@ -105,12 +107,36 @@ fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Sec .with_additional_node(ContextElement::Context("section")); let element_matcher = parser_with_context!(element)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + let trailing_matcher = parser_with_context!(element_trailing_whitespace)(&parser_context); let (remaining, (children, _exit_contents)) = verify( - many_till(element_matcher, exit_matcher), + many_till( + tuple(( + element_matcher, + opt(map(trailing_matcher, Element::TrailingWhitespace)), + )), + exit_matcher, + ), |(children, _exit_contents)| !children.is_empty(), )(input)?; + let flattened_children: Vec = children + .into_iter() + .flat_map(|tpl| { + let mut flattened_children = Vec::with_capacity(2); + flattened_children.push(tpl.0); + if let Some(bar) = tpl.1 { + flattened_children.push(bar); + } + flattened_children.into_iter() + }) + .collect(); let source = get_consumed(input, remaining); - Ok((remaining, Section { source, children })) + Ok(( + remaining, + Section { + source, + children: flattened_children, + }, + )) } #[tracing::instrument(ret, level = "debug")] diff --git a/src/parser/element.rs b/src/parser/element.rs index d0f2ee0a..db304976 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -13,6 +13,10 @@ use nom::combinator::map; pub enum Element<'s> { Paragraph(Paragraph<'s>), PlainList(PlainList<'s>), + /// The whitespace that follows an element. + /// + /// This isn't a real org-mode element. Except for items in plain lists, trailing blank lines belong to the preceding element. It is a separate `Element` in this enum to make parsing easier. + TrailingWhitespace(&'s str), } impl<'s> Source<'s> for Element<'s> { @@ -20,6 +24,7 @@ impl<'s> Source<'s> for Element<'s> { match self { Element::Paragraph(obj) => obj.source, Element::PlainList(obj) => obj.source, + Element::TrailingWhitespace(src) => src, } } } diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index b0e6d2ff..09903b81 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -36,8 +36,6 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st |(children, _exit_contents)| !children.is_empty(), )(input)?; - let (remaining, _trailing_whitespace) = trailing_whitespace(remaining)?; - let source = get_consumed(input, remaining); Ok((remaining, Paragraph { source, children })) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index c3d50db9..9e314286 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -39,7 +39,6 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s }), exit_matcher, )(remaining)?; - let (remaining, _trailing_whitespace) = trailing_whitespace(remaining)?; let source = get_consumed(input, remaining); children.insert(0, first_item); Ok((remaining, PlainList { source, children })) diff --git a/src/parser/util.rs b/src/parser/util.rs index ecd08444..8abbe1af 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -77,6 +77,15 @@ pub fn blank_line(input: &str) -> Res<&str, &str> { recognize(tuple((space0, alt((line_ending, eof)))))(input) } +#[tracing::instrument(ret, level = "debug")] +pub fn element_trailing_whitespace<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + start_of_line(context, input)?; + alt((eof, recognize(many0(blank_line))))(input) +} + #[tracing::instrument(ret, level = "debug")] pub fn trailing_whitespace(input: &str) -> Res<&str, &str> { alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input)