Create an also_recognize combinator to make providing source slices fast and safe.
This commit is contained in:
parent
60d9487fdf
commit
448dcfac72
@ -1,3 +1,5 @@
|
|||||||
|
use std::ops::RangeTo;
|
||||||
|
|
||||||
use super::parser_context::ContextElement;
|
use super::parser_context::ContextElement;
|
||||||
use super::parser_context::PreviousElementNode;
|
use super::parser_context::PreviousElementNode;
|
||||||
use super::token::Token;
|
use super::token::Token;
|
||||||
@ -6,6 +8,28 @@ use nom::error::ErrorKind;
|
|||||||
use nom::error::ParseError;
|
use nom::error::ParseError;
|
||||||
use nom::IResult;
|
use nom::IResult;
|
||||||
use nom::InputLength;
|
use nom::InputLength;
|
||||||
|
use nom::Offset;
|
||||||
|
use nom::Parser;
|
||||||
|
use nom::Slice;
|
||||||
|
|
||||||
|
/// Return both the parsed output and the output of recognize() together without having to run the child parser twice.
|
||||||
|
pub fn also_recognize<I: Clone + Offset + Slice<RangeTo<usize>>, O, E: ParseError<I>, F>(
|
||||||
|
mut parser: F,
|
||||||
|
) -> impl FnMut(I) -> IResult<I, (I, O), E>
|
||||||
|
where
|
||||||
|
F: Parser<I, O, E>,
|
||||||
|
{
|
||||||
|
move |input: I| {
|
||||||
|
let i = input.clone();
|
||||||
|
match parser.parse(i) {
|
||||||
|
Ok((i, val)) => {
|
||||||
|
let index = input.offset(&i);
|
||||||
|
Ok((i, (input.slice(..index), val)))
|
||||||
|
}
|
||||||
|
Err(e) => Err(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn context_many1<'r, 's, I, O, E, M>(
|
pub fn context_many1<'r, 's, I, O, E, M>(
|
||||||
context: Context<'r, 's>,
|
context: Context<'r, 's>,
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use super::combinator::also_recognize;
|
||||||
use super::combinator::context_many_till;
|
use super::combinator::context_many_till;
|
||||||
use super::error::Res;
|
use super::error::Res;
|
||||||
use super::parser_context::ChainBehavior;
|
use super::parser_context::ChainBehavior;
|
||||||
@ -26,8 +27,11 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, P
|
|||||||
exit_matcher: ChainBehavior::AndParent(Some(&context_paragraph_end)),
|
exit_matcher: ChainBehavior::AndParent(Some(&context_paragraph_end)),
|
||||||
}))
|
}))
|
||||||
.with_additional_node(ContextElement::StartOfParagraph);
|
.with_additional_node(ContextElement::StartOfParagraph);
|
||||||
let (remaining, (many, till)) =
|
let (remaining, (source, (many, till))) = also_recognize(context_many_till(
|
||||||
context_many_till(¶graph_context, text_element, context_paragraph_end)(i)?;
|
¶graph_context,
|
||||||
|
text_element,
|
||||||
|
context_paragraph_end,
|
||||||
|
))(i)?;
|
||||||
let many = many
|
let many = many
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|token| match token {
|
.filter_map(|token| match token {
|
||||||
@ -40,6 +44,7 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, P
|
|||||||
Paragraph {
|
Paragraph {
|
||||||
contents: many,
|
contents: many,
|
||||||
paragraph_end: till,
|
paragraph_end: till,
|
||||||
|
source,
|
||||||
},
|
},
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
@ -68,6 +68,7 @@ pub struct Link<'a> {
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Paragraph<'a> {
|
pub struct Paragraph<'a> {
|
||||||
|
pub source: &'a str,
|
||||||
pub contents: Vec<TextElement<'a>>,
|
pub contents: Vec<TextElement<'a>>,
|
||||||
pub paragraph_end: &'a str,
|
pub paragraph_end: &'a str,
|
||||||
}
|
}
|
||||||
@ -91,22 +92,6 @@ impl<'a> Source<'a> for TextElement<'a> {
|
|||||||
|
|
||||||
impl<'a> Source<'a> for Paragraph<'a> {
|
impl<'a> Source<'a> for Paragraph<'a> {
|
||||||
fn get_source(&'a self) -> &'a str {
|
fn get_source(&'a self) -> &'a str {
|
||||||
if self.contents.is_empty() {
|
self.source
|
||||||
return self.paragraph_end;
|
|
||||||
}
|
|
||||||
// TODO: Is there a better way to do this? At a minimum I should be checking that the pointers are contiguous instead of blindly adding their lengths but maybe theres a good way in nom to get both the recognize() value and the parsed values so we can just store a &str to the source.
|
|
||||||
let start = self.contents[0].get_source().as_ptr();
|
|
||||||
let len = self
|
|
||||||
.contents
|
|
||||||
.iter()
|
|
||||||
.map(|text_element| text_element.get_source().len())
|
|
||||||
.sum::<usize>()
|
|
||||||
+ self.paragraph_end.len();
|
|
||||||
let full_source = unsafe {
|
|
||||||
let slice = std::slice::from_raw_parts(start, len);
|
|
||||||
std::str::from_utf8(slice)
|
|
||||||
.expect("A token should always be made with valid utf-8 source material.")
|
|
||||||
};
|
|
||||||
full_source
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user