Create an also_recognize combinator to make providing source slices fast and safe.
This commit is contained in:
parent
60d9487fdf
commit
448dcfac72
@ -1,3 +1,5 @@
|
||||
use std::ops::RangeTo;
|
||||
|
||||
use super::parser_context::ContextElement;
|
||||
use super::parser_context::PreviousElementNode;
|
||||
use super::token::Token;
|
||||
@ -6,6 +8,28 @@ use nom::error::ErrorKind;
|
||||
use nom::error::ParseError;
|
||||
use nom::IResult;
|
||||
use nom::InputLength;
|
||||
use nom::Offset;
|
||||
use nom::Parser;
|
||||
use nom::Slice;
|
||||
|
||||
/// Return both the parsed output and the output of recognize() together without having to run the child parser twice.
|
||||
pub fn also_recognize<I: Clone + Offset + Slice<RangeTo<usize>>, O, E: ParseError<I>, F>(
|
||||
mut parser: F,
|
||||
) -> impl FnMut(I) -> IResult<I, (I, O), E>
|
||||
where
|
||||
F: Parser<I, O, E>,
|
||||
{
|
||||
move |input: I| {
|
||||
let i = input.clone();
|
||||
match parser.parse(i) {
|
||||
Ok((i, val)) => {
|
||||
let index = input.offset(&i);
|
||||
Ok((i, (input.slice(..index), val)))
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn context_many1<'r, 's, I, O, E, M>(
|
||||
context: Context<'r, 's>,
|
||||
|
@ -1,3 +1,4 @@
|
||||
use super::combinator::also_recognize;
|
||||
use super::combinator::context_many_till;
|
||||
use super::error::Res;
|
||||
use super::parser_context::ChainBehavior;
|
||||
@ -26,8 +27,11 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, P
|
||||
exit_matcher: ChainBehavior::AndParent(Some(&context_paragraph_end)),
|
||||
}))
|
||||
.with_additional_node(ContextElement::StartOfParagraph);
|
||||
let (remaining, (many, till)) =
|
||||
context_many_till(¶graph_context, text_element, context_paragraph_end)(i)?;
|
||||
let (remaining, (source, (many, till))) = also_recognize(context_many_till(
|
||||
¶graph_context,
|
||||
text_element,
|
||||
context_paragraph_end,
|
||||
))(i)?;
|
||||
let many = many
|
||||
.into_iter()
|
||||
.filter_map(|token| match token {
|
||||
@ -40,6 +44,7 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, P
|
||||
Paragraph {
|
||||
contents: many,
|
||||
paragraph_end: till,
|
||||
source,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
@ -68,6 +68,7 @@ pub struct Link<'a> {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Paragraph<'a> {
|
||||
pub source: &'a str,
|
||||
pub contents: Vec<TextElement<'a>>,
|
||||
pub paragraph_end: &'a str,
|
||||
}
|
||||
@ -91,22 +92,6 @@ impl<'a> Source<'a> for TextElement<'a> {
|
||||
|
||||
impl<'a> Source<'a> for Paragraph<'a> {
|
||||
fn get_source(&'a self) -> &'a str {
|
||||
if self.contents.is_empty() {
|
||||
return self.paragraph_end;
|
||||
}
|
||||
// TODO: Is there a better way to do this? At a minimum I should be checking that the pointers are contiguous instead of blindly adding their lengths but maybe theres a good way in nom to get both the recognize() value and the parsed values so we can just store a &str to the source.
|
||||
let start = self.contents[0].get_source().as_ptr();
|
||||
let len = self
|
||||
.contents
|
||||
.iter()
|
||||
.map(|text_element| text_element.get_source().len())
|
||||
.sum::<usize>()
|
||||
+ self.paragraph_end.len();
|
||||
let full_source = unsafe {
|
||||
let slice = std::slice::from_raw_parts(start, len);
|
||||
std::str::from_utf8(slice)
|
||||
.expect("A token should always be made with valid utf-8 source material.")
|
||||
};
|
||||
full_source
|
||||
self.source
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user