Create an also_recognize combinator to make providing source slices fast and safe.

2022-12-18 04:30:44 -05:00 · 2022-12-18 04:30:44 -05:00 · 448dcfac72
commit 448dcfac72
parent 60d9487fdf
3 changed files with 33 additions and 19 deletions
--- a/src/parser/combinator.rs
+++ b/src/parser/combinator.rs
@ -1,3 +1,5 @@
 use std::ops::RangeTo;
 use super::parser_context::ContextElement;
 use super::parser_context::PreviousElementNode;
 use super::token::Token;
@ -6,6 +8,28 @@ use nom::error::ErrorKind;
 use nom::error::ParseError;
 use nom::IResult;
 use nom::InputLength;
 use nom::Offset;
 use nom::Parser;
 use nom::Slice;
 /// Return both the parsed output and the output of recognize() together without having to run the child parser twice.
 pub fn also_recognize<I: Clone + Offset + Slice<RangeTo<usize>>, O, E: ParseError<I>, F>(
    mut parser: F,
 ) -> impl FnMut(I) -> IResult<I, (I, O), E>
 where
    F: Parser<I, O, E>,
 {
    move |input: I| {
        let i = input.clone();
        match parser.parse(i) {
            Ok((i, val)) => {
                let index = input.offset(&i);
                Ok((i, (input.slice(..index), val)))
            }
            Err(e) => Err(e),
        }
    }
 }
 pub fn context_many1<'r, 's, I, O, E, M>(
    context: Context<'r, 's>,
--- a/src/parser/paragraph.rs
+++ b/src/parser/paragraph.rs
@ -1,3 +1,4 @@
 use super::combinator::also_recognize;
 use super::combinator::context_many_till;
 use super::error::Res;
 use super::parser_context::ChainBehavior;
@ -26,8 +27,11 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, P
            exit_matcher: ChainBehavior::AndParent(Some(&context_paragraph_end)),
        }))
        .with_additional_node(ContextElement::StartOfParagraph);
-    let (remaining, (many, till)) =
+    let (remaining, (source, (many, till))) = also_recognize(context_many_till(
-        context_many_till(&paragraph_context, text_element, context_paragraph_end)(i)?;
+        &paragraph_context,
        text_element,
        context_paragraph_end,
    ))(i)?;
    let many = many
        .into_iter()
        .filter_map(|token| match token {
@ -40,6 +44,7 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, P
        Paragraph {
            contents: many,
            paragraph_end: till,
            source,
        },
    ))
 }
--- a/src/parser/token.rs
+++ b/src/parser/token.rs
@ -68,6 +68,7 @@ pub struct Link<'a> {
 #[derive(Debug)]
 pub struct Paragraph<'a> {
    pub source: &'a str,
    pub contents: Vec<TextElement<'a>>,
    pub paragraph_end: &'a str,
 }
@ -91,22 +92,6 @@ impl<'a> Source<'a> for TextElement<'a> {
 impl<'a> Source<'a> for Paragraph<'a> {
    fn get_source(&'a self) -> &'a str {
-        if self.contents.is_empty() {
+        self.source
            return self.paragraph_end;
        }
        // TODO: Is there a better way to do this? At a minimum I should be checking that the pointers are contiguous instead of blindly adding their lengths but maybe theres a good way in nom to get both the recognize() value and the parsed values so we can just store a &str to the source.
        let start = self.contents[0].get_source().as_ptr();
        let len = self
            .contents
            .iter()
            .map(|text_element| text_element.get_source().len())
            .sum::<usize>()
            + self.paragraph_end.len();
        let full_source = unsafe {
            let slice = std::slice::from_raw_parts(start, len);
            std::str::from_utf8(slice)
                .expect("A token should always be made with valid utf-8 source material.")
        };
        full_source
    }
 }