Move trailing whitespace parsing to a separate element.

I still need to parse the line break at the end of elements.
This commit is contained in:
Tom Alexander 2023-03-31 11:16:37 -04:00
parent 602cf4c374
commit 707eac5bf8
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
5 changed files with 42 additions and 5 deletions

View File

@ -20,10 +20,12 @@ use crate::parser::parser_context::ChainBehavior;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ContextTree;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::util::element_trailing_whitespace;
use super::element::Element;
use super::error::Res;
use super::object::Object;
use super::parser_context;
use super::parser_with_context::parser_with_context;
use super::source::Source;
use super::util::exit_matcher_parser;
@ -105,12 +107,36 @@ fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Sec
.with_additional_node(ContextElement::Context("section"));
let element_matcher = parser_with_context!(element)(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let trailing_matcher = parser_with_context!(element_trailing_whitespace)(&parser_context);
let (remaining, (children, _exit_contents)) = verify(
many_till(element_matcher, exit_matcher),
many_till(
tuple((
element_matcher,
opt(map(trailing_matcher, Element::TrailingWhitespace)),
)),
exit_matcher,
),
|(children, _exit_contents)| !children.is_empty(),
)(input)?;
let flattened_children: Vec<Element> = children
.into_iter()
.flat_map(|tpl| {
let mut flattened_children = Vec::with_capacity(2);
flattened_children.push(tpl.0);
if let Some(bar) = tpl.1 {
flattened_children.push(bar);
}
flattened_children.into_iter()
})
.collect();
let source = get_consumed(input, remaining);
Ok((remaining, Section { source, children }))
Ok((
remaining,
Section {
source,
children: flattened_children,
},
))
}
#[tracing::instrument(ret, level = "debug")]

View File

@ -13,6 +13,10 @@ use nom::combinator::map;
pub enum Element<'s> {
Paragraph(Paragraph<'s>),
PlainList(PlainList<'s>),
/// The whitespace that follows an element.
///
/// This isn't a real org-mode element. Except for items in plain lists, trailing blank lines belong to the preceding element. It is a separate `Element` in this enum to make parsing easier.
TrailingWhitespace(&'s str),
}
impl<'s> Source<'s> for Element<'s> {
@ -20,6 +24,7 @@ impl<'s> Source<'s> for Element<'s> {
match self {
Element::Paragraph(obj) => obj.source,
Element::PlainList(obj) => obj.source,
Element::TrailingWhitespace(src) => src,
}
}
}

View File

@ -36,8 +36,6 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st
|(children, _exit_contents)| !children.is_empty(),
)(input)?;
let (remaining, _trailing_whitespace) = trailing_whitespace(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, Paragraph { source, children }))

View File

@ -39,7 +39,6 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
}),
exit_matcher,
)(remaining)?;
let (remaining, _trailing_whitespace) = trailing_whitespace(remaining)?;
let source = get_consumed(input, remaining);
children.insert(0, first_item);
Ok((remaining, PlainList { source, children }))

View File

@ -77,6 +77,15 @@ pub fn blank_line(input: &str) -> Res<&str, &str> {
recognize(tuple((space0, alt((line_ending, eof)))))(input)
}
#[tracing::instrument(ret, level = "debug")]
pub fn element_trailing_whitespace<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
start_of_line(context, input)?;
alt((eof, recognize(many0(blank_line))))(input)
}
#[tracing::instrument(ret, level = "debug")]
pub fn trailing_whitespace(input: &str) -> Res<&str, &str> {
alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input)