From d582c8603a4ed534dcd5eb5b2bd73be60dfedb95 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 11:22:59 -0400 Subject: [PATCH] Implement a basic paragraph parser. --- src/parser/document.rs | 4 +-- src/parser/element.rs | 47 +++++++++++++++++++++++++++++++++++- src/parser/lesser_element.rs | 3 +++ 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index a256f5ac..52fa5f38 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -155,7 +155,7 @@ fn headline<'r, 's>( } fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { - line_ending(input) + alt((line_ending, eof))(input) } /// Check that we are at the start of a line @@ -199,7 +199,7 @@ fn is_slice_of(parent: &str, child: &str) -> bool { } /// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser. -fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { +pub fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { assert!(is_slice_of(input, remaining)); let source = { let offset = remaining.as_ptr() as usize - input.as_ptr() as usize; diff --git a/src/parser/element.rs b/src/parser/element.rs index ecfa993e..aa42936e 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -1,4 +1,19 @@ +use nom::branch::alt; +use nom::character::complete::line_ending; +use nom::character::complete::space0; +use nom::combinator::eof; +use nom::combinator::map; use nom::combinator::not; +use nom::combinator::recognize; +use nom::multi::many1; +use nom::sequence::tuple; + +use crate::parser::document::get_consumed; +use crate::parser::object::standard_set_object; +use crate::parser::parser_context::ChainBehavior; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::parser_with_context::parser_with_context; use super::error::Res; use super::greater_element::PlainList; @@ -23,5 +38,35 @@ impl<'s> Source<'s> for Element<'s> { pub fn element<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Element<'s>> { not(|i| context.check_exit_matcher(i))(input)?; - todo!() + + let paragraph_matcher = parser_with_context!(paragraph)(context); + + map(paragraph_matcher, Element::Paragraph)(input) +} + +fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Paragraph<'s>> { + let parser_context = + context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + exit_matcher: ChainBehavior::AndParent(Some(¶graph_end)), + })); + let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context); + + let (remaining, children) = many1(standard_set_object_matcher)(input)?; + + let source = get_consumed(input, remaining); + + Ok((remaining, Paragraph { source, children })) +} + +fn paragraph_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + // TODO: Other elements should also end paragraphs + alt((recognize(tuple((line_ending, many1(blank_line)))), eof))(input) +} + +/// A line containing only whitespace and then a line break +/// +/// It is up to the caller to ensure this is called at the start of a line. +fn blank_line(input: &str) -> Res<&str, &str> { + not(eof)(input)?; + recognize(tuple((space0, alt((line_ending, eof)))))(input) } diff --git a/src/parser/lesser_element.rs b/src/parser/lesser_element.rs index 83fd38b0..5abc4b38 100644 --- a/src/parser/lesser_element.rs +++ b/src/parser/lesser_element.rs @@ -1,4 +1,7 @@ +use super::object::Object; + #[derive(Debug)] pub struct Paragraph<'s> { pub source: &'s str, + pub children: Vec>, }