Create an also_recognize combinator to make providing source slices fast and safe.

This commit is contained in:
Tom Alexander 2022-12-18 04:30:44 -05:00
parent 60d9487fdf
commit 448dcfac72
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
3 changed files with 33 additions and 19 deletions

View File

@ -1,3 +1,5 @@
use std::ops::RangeTo;
use super::parser_context::ContextElement;
use super::parser_context::PreviousElementNode;
use super::token::Token;
@ -6,6 +8,28 @@ use nom::error::ErrorKind;
use nom::error::ParseError;
use nom::IResult;
use nom::InputLength;
use nom::Offset;
use nom::Parser;
use nom::Slice;
/// Return both the parsed output and the output of recognize() together without having to run the child parser twice.
pub fn also_recognize<I: Clone + Offset + Slice<RangeTo<usize>>, O, E: ParseError<I>, F>(
mut parser: F,
) -> impl FnMut(I) -> IResult<I, (I, O), E>
where
F: Parser<I, O, E>,
{
move |input: I| {
let i = input.clone();
match parser.parse(i) {
Ok((i, val)) => {
let index = input.offset(&i);
Ok((i, (input.slice(..index), val)))
}
Err(e) => Err(e),
}
}
}
pub fn context_many1<'r, 's, I, O, E, M>(
context: Context<'r, 's>,

View File

@ -1,3 +1,4 @@
use super::combinator::also_recognize;
use super::combinator::context_many_till;
use super::error::Res;
use super::parser_context::ChainBehavior;
@ -26,8 +27,11 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, P
exit_matcher: ChainBehavior::AndParent(Some(&context_paragraph_end)),
}))
.with_additional_node(ContextElement::StartOfParagraph);
let (remaining, (many, till)) =
context_many_till(&paragraph_context, text_element, context_paragraph_end)(i)?;
let (remaining, (source, (many, till))) = also_recognize(context_many_till(
&paragraph_context,
text_element,
context_paragraph_end,
))(i)?;
let many = many
.into_iter()
.filter_map(|token| match token {
@ -40,6 +44,7 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, P
Paragraph {
contents: many,
paragraph_end: till,
source,
},
))
}

View File

@ -68,6 +68,7 @@ pub struct Link<'a> {
#[derive(Debug)]
pub struct Paragraph<'a> {
pub source: &'a str,
pub contents: Vec<TextElement<'a>>,
pub paragraph_end: &'a str,
}
@ -91,22 +92,6 @@ impl<'a> Source<'a> for TextElement<'a> {
impl<'a> Source<'a> for Paragraph<'a> {
fn get_source(&'a self) -> &'a str {
if self.contents.is_empty() {
return self.paragraph_end;
}
// TODO: Is there a better way to do this? At a minimum I should be checking that the pointers are contiguous instead of blindly adding their lengths but maybe theres a good way in nom to get both the recognize() value and the parsed values so we can just store a &str to the source.
let start = self.contents[0].get_source().as_ptr();
let len = self
.contents
.iter()
.map(|text_element| text_element.get_source().len())
.sum::<usize>()
+ self.paragraph_end.len();
let full_source = unsafe {
let slice = std::slice::from_raw_parts(start, len);
std::str::from_utf8(slice)
.expect("A token should always be made with valid utf-8 source material.")
};
full_source
self.source
}
}