From 1da38c8f7d42905e4ac4e7af91b898afd3acf1b2 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 16 Dec 2022 01:35:49 -0500 Subject: [PATCH] Use context_many1 for paragraphs in a document. --- src/parser/combinator.rs | 3 ++- src/parser/text.rs | 6 ++++++ src/parser/text_element_parser.rs | 32 +++++++++++++++++++++---------- src/parser/token.rs | 8 ++++++++ 4 files changed, 38 insertions(+), 11 deletions(-) diff --git a/src/parser/combinator.rs b/src/parser/combinator.rs index 58ea89b..f5d5fca 100644 --- a/src/parser/combinator.rs +++ b/src/parser/combinator.rs @@ -72,7 +72,7 @@ where } } } - let elements: Vec> = current_context + let mut elements: Vec> = current_context .into_iter_until(context) .filter_map(|context_element| match context_element { ContextElement::PreviousElementNode(elem) => Some(elem.element), @@ -86,6 +86,7 @@ where err?; } } + elements.reverse(); Ok((i, elements)) } } diff --git a/src/parser/text.rs b/src/parser/text.rs index 69da8ac..fa98650 100644 --- a/src/parser/text.rs +++ b/src/parser/text.rs @@ -79,6 +79,12 @@ pub struct Link<'a> { pub contents: &'a str, } +#[derive(Debug)] +pub struct Paragraph<'a> { + pub contents: Vec>, + pub paragraph_end: &'a str, +} + pub fn line_break(input: &str) -> Res<&str, LineBreak> { map(line_ending, |s: &str| LineBreak { contents: s })(input) } diff --git a/src/parser/text_element_parser.rs b/src/parser/text_element_parser.rs index b60f27f..a065e03 100644 --- a/src/parser/text_element_parser.rs +++ b/src/parser/text_element_parser.rs @@ -21,6 +21,7 @@ use super::text::span; use super::text::symbol; use super::text::Bold; use super::text::Link; +use super::text::Paragraph; use super::text::Res; use super::text::TextElement; use super::token::Token; @@ -45,10 +46,16 @@ use nom::InputLength; type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult; -pub fn document(input: &str) -> Res<&str, Vec<(Vec, &str)>> { +pub fn document(input: &str) -> Res<&str, Vec> { let initial_context: ContextTree<'_, '_> = ContextTree::new(); - let ret = context_many1(&initial_context, paragraph)(input); - ret + let (remaining, tokens) = context_many1(&initial_context, paragraph)(input)?; + let paragraphs = tokens.into_iter().map(|token| { + match token { + Token::TextElement(_) => unreachable!(), + Token::Paragraph(paragraph) => paragraph, + } + }).collect(); + Ok((remaining, paragraphs)) } pub fn context_paragraph_end<'s, 'r>( @@ -93,6 +100,7 @@ fn _preceded_by_whitespace<'s, 'r>(context: Context<'r, 's>) -> bool { TextElement::Link(_) => return false, }; } + Token::Paragraph(_) => unreachable!(), }; } ContextElement::StartOfParagraph => { @@ -134,26 +142,30 @@ pub fn context_bold_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res Ok((remaining, actual_match)) } -pub fn paragraph<'s, 'r>( - context: Context<'r, 's>, - i: &'s str, -) -> Res<&'s str, (Vec>, &'s str)> { +pub fn paragraph<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Paragraph<'s>> { // Add a not(eof) check because many_till cannot match a zero-length string not(eof)(i)?; let paragraph_context = context - .with_additional_node(ContextElement::StartOfParagraph) .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { exit_matcher: ChainBehavior::AndParent(Some(&context_paragraph_end)), - })); + })) + .with_additional_node(ContextElement::StartOfParagraph); let (remaining, (many, till)) = context_many_till(¶graph_context, flat_text_element, context_paragraph_end)(i)?; let many = many .into_iter() .filter_map(|token| match token { Token::TextElement(text_element) => Some(text_element), + Token::Paragraph(_) => panic!("There should only be text elements in paragraphs."), }) .collect(); - Ok((remaining, (many, till))) + Ok(( + remaining, + Paragraph { + contents: many, + paragraph_end: till, + }, + )) } fn flat_text_element<'s, 'r>( diff --git a/src/parser/token.rs b/src/parser/token.rs index 64eb783..959e633 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -1,8 +1,10 @@ +use super::text::Paragraph; use super::text::TextElement; #[derive(Debug)] pub enum Token<'a> { TextElement(TextElement<'a>), + Paragraph(Paragraph<'a>), } impl<'a> Into> for TextElement<'a> { @@ -10,3 +12,9 @@ impl<'a> Into> for TextElement<'a> { Token::TextElement(self) } } + +impl<'a> Into> for Paragraph<'a> { + fn into(self) -> Token<'a> { + Token::Paragraph(self) + } +}