diff --git a/src/parser/combinator.rs b/src/parser/combinator.rs index fc0ec87..fce936c 100644 --- a/src/parser/combinator.rs +++ b/src/parser/combinator.rs @@ -1,3 +1,5 @@ +use std::ops::RangeTo; + use super::parser_context::ContextElement; use super::parser_context::PreviousElementNode; use super::token::Token; @@ -6,6 +8,28 @@ use nom::error::ErrorKind; use nom::error::ParseError; use nom::IResult; use nom::InputLength; +use nom::Offset; +use nom::Parser; +use nom::Slice; + +/// Return both the parsed output and the output of recognize() together without having to run the child parser twice. +pub fn also_recognize>, O, E: ParseError, F>( + mut parser: F, +) -> impl FnMut(I) -> IResult +where + F: Parser, +{ + move |input: I| { + let i = input.clone(); + match parser.parse(i) { + Ok((i, val)) => { + let index = input.offset(&i); + Ok((i, (input.slice(..index), val))) + } + Err(e) => Err(e), + } + } +} pub fn context_many1<'r, 's, I, O, E, M>( context: Context<'r, 's>, diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index 83a3a97..391fc96 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -1,3 +1,4 @@ +use super::combinator::also_recognize; use super::combinator::context_many_till; use super::error::Res; use super::parser_context::ChainBehavior; @@ -26,8 +27,11 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, P exit_matcher: ChainBehavior::AndParent(Some(&context_paragraph_end)), })) .with_additional_node(ContextElement::StartOfParagraph); - let (remaining, (many, till)) = - context_many_till(¶graph_context, text_element, context_paragraph_end)(i)?; + let (remaining, (source, (many, till))) = also_recognize(context_many_till( + ¶graph_context, + text_element, + context_paragraph_end, + ))(i)?; let many = many .into_iter() .filter_map(|token| match token { @@ -40,6 +44,7 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, P Paragraph { contents: many, paragraph_end: till, + source, }, )) } diff --git a/src/parser/token.rs b/src/parser/token.rs index db1bf19..d089aee 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -68,6 +68,7 @@ pub struct Link<'a> { #[derive(Debug)] pub struct Paragraph<'a> { + pub source: &'a str, pub contents: Vec>, pub paragraph_end: &'a str, } @@ -91,22 +92,6 @@ impl<'a> Source<'a> for TextElement<'a> { impl<'a> Source<'a> for Paragraph<'a> { fn get_source(&'a self) -> &'a str { - if self.contents.is_empty() { - return self.paragraph_end; - } - // TODO: Is there a better way to do this? At a minimum I should be checking that the pointers are contiguous instead of blindly adding their lengths but maybe theres a good way in nom to get both the recognize() value and the parsed values so we can just store a &str to the source. - let start = self.contents[0].get_source().as_ptr(); - let len = self - .contents - .iter() - .map(|text_element| text_element.get_source().len()) - .sum::() - + self.paragraph_end.len(); - let full_source = unsafe { - let slice = std::slice::from_raw_parts(start, len); - std::str::from_utf8(slice) - .expect("A token should always be made with valid utf-8 source material.") - }; - full_source + self.source } }