Merge branch 'context_many1'
This commit is contained in:
commit
279b9dceb4
@ -1,45 +1,63 @@
|
||||
use crate::parser::parser_with_context::parser_with_context;
|
||||
use crate::parser::text::paragraph_end;
|
||||
|
||||
use super::error::CustomError;
|
||||
use super::error::MyError;
|
||||
use super::nom_context::ChainBehavior;
|
||||
use super::nom_context::ContextElement;
|
||||
use super::nom_context::ContextTree;
|
||||
use super::nom_context::ExitMatcherNode;
|
||||
use super::nom_context::PreviousElementNode;
|
||||
use super::text::bold_end;
|
||||
use super::text::bold_start;
|
||||
use super::text::line_break;
|
||||
use super::text::link_end;
|
||||
use super::text::link_start;
|
||||
use super::text::space;
|
||||
use super::text::span;
|
||||
use super::text::symbol;
|
||||
use super::text::Bold;
|
||||
use super::text::Link;
|
||||
use super::text::Res;
|
||||
use super::text::TextElement;
|
||||
use super::token::Token;
|
||||
use super::Context;
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::bytes::complete::take;
|
||||
use nom::combinator::cond;
|
||||
use nom::combinator::eof;
|
||||
use nom::combinator::map;
|
||||
use nom::combinator::not;
|
||||
use nom::combinator::peek;
|
||||
use nom::combinator::recognize;
|
||||
use nom::error::ErrorKind;
|
||||
use nom::error::ParseError;
|
||||
use nom::error::VerboseError;
|
||||
use nom::multi::many1;
|
||||
use nom::multi::many_till;
|
||||
use nom::sequence::tuple;
|
||||
use nom::IResult;
|
||||
use nom::InputLength;
|
||||
|
||||
pub fn context_many1<'r, 's, I, O, E, M>(
|
||||
context: Context<'r, 's>,
|
||||
mut many_matcher: M,
|
||||
) -> impl FnMut(I) -> IResult<I, Vec<Token<'s>>, E> + 'r
|
||||
where
|
||||
I: Clone + InputLength,
|
||||
E: ParseError<I>,
|
||||
M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
|
||||
O: Into<Token<'s>>,
|
||||
{
|
||||
move |mut i: I| {
|
||||
let mut err = None;
|
||||
// TODO: Can I eliminate the clone? I think this is incrementing the reference count
|
||||
let mut current_context = context.clone();
|
||||
// Despite the clone, the Rc should still point to the same value.
|
||||
assert!(current_context.ptr_eq(context));
|
||||
loop {
|
||||
match many_matcher(¤t_context, i.clone()) {
|
||||
Ok((remaining, many_elem)) => {
|
||||
current_context = current_context.with_additional_node(
|
||||
ContextElement::PreviousElementNode(PreviousElementNode {
|
||||
element: many_elem.into(),
|
||||
}),
|
||||
);
|
||||
i = remaining;
|
||||
}
|
||||
the_error @ Err(_) => {
|
||||
err = Some(the_error);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut elements: Vec<Token<'s>> = current_context
|
||||
.into_iter_until(context)
|
||||
.filter_map(|context_element| match context_element {
|
||||
ContextElement::PreviousElementNode(elem) => Some(elem.element),
|
||||
ContextElement::ExitMatcherNode(_) => None,
|
||||
ContextElement::Context(_) => None,
|
||||
ContextElement::StartOfParagraph => None,
|
||||
})
|
||||
.collect();
|
||||
if elements.is_empty() {
|
||||
if let Some(err) = err {
|
||||
err?;
|
||||
}
|
||||
}
|
||||
elements.reverse();
|
||||
Ok((i, elements))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn context_many_till<'r, 's, I, O, E, F, M, T>(
|
||||
context: Context<'r, 's>,
|
||||
mut many_matcher: M,
|
||||
@ -53,7 +71,7 @@ where
|
||||
O: Into<Token<'s>>,
|
||||
{
|
||||
move |mut i: I| {
|
||||
// TODO: Can I eliminate the clone?
|
||||
// TODO: Can I eliminate the clone? I think this is incrementing the reference count
|
||||
let mut current_context = context.clone();
|
||||
// Despite the clone, the Rc should still point to the same value, otherwise we'll get stuck in an endless loop.
|
||||
assert!(current_context.ptr_eq(context));
|
||||
|
@ -1,8 +1,5 @@
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::error::ErrorKind;
|
||||
use nom::error::ParseError;
|
||||
use nom::error::VerboseError;
|
||||
use nom::IResult;
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum CustomError<I> {
|
||||
|
@ -27,7 +27,7 @@ impl<T> Node<T> {
|
||||
}
|
||||
|
||||
// TODO: This Debug is only needed because of the try_unwrap+expect
|
||||
impl<T: Debug> List<T> {
|
||||
impl<T> List<T> {
|
||||
pub fn new() -> Self {
|
||||
List { head: None }
|
||||
}
|
||||
@ -45,8 +45,10 @@ impl<T: Debug> List<T> {
|
||||
match self.head.take() {
|
||||
None => (None, List::new()),
|
||||
Some(popped_node) => {
|
||||
let extracted_node =
|
||||
Rc::try_unwrap(popped_node).expect("TODO I should handle this better");
|
||||
let extracted_node = match Rc::try_unwrap(popped_node) {
|
||||
Ok(node) => node,
|
||||
Err(e) => panic!("try_unwrap failed on Rc in pop_front on List."),
|
||||
};
|
||||
(
|
||||
Some(extracted_node.data),
|
||||
List {
|
||||
@ -91,6 +93,13 @@ impl<T: Debug> List<T> {
|
||||
stop: &other.head,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_iter_until<'a>(self, other: &'a List<T>) -> impl Iterator<Item = T> + 'a {
|
||||
NodeIntoIterUntil {
|
||||
position: self,
|
||||
stop: &other,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct NodeIter<'a, T> {
|
||||
@ -144,3 +153,21 @@ impl<'a, T> Iterator for NodeIterUntil<'a, T> {
|
||||
Some(return_value)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct NodeIntoIterUntil<'a, T> {
|
||||
position: List<T>,
|
||||
stop: &'a List<T>,
|
||||
}
|
||||
|
||||
impl<'a, T> Iterator for NodeIntoIterUntil<'a, T> {
|
||||
type Item = T;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.position.ptr_eq(self.stop) {
|
||||
return None;
|
||||
}
|
||||
let (popped_element, new_position) = self.position.pop_front();
|
||||
self.position = new_position;
|
||||
popped_element
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,5 @@
|
||||
use std::rc::Rc;
|
||||
|
||||
use nom::bytes::complete::take;
|
||||
use nom::combinator::not;
|
||||
use nom::error::VerboseError;
|
||||
use nom::IResult;
|
||||
|
||||
use super::error::CustomError;
|
||||
@ -50,6 +47,13 @@ impl<'r, 's> ContextTree<'r, 's> {
|
||||
self.tree.iter_until(&other.tree)
|
||||
}
|
||||
|
||||
pub fn into_iter_until<'x: 'r>(
|
||||
self,
|
||||
other: &'x ContextTree<'x, 's>,
|
||||
) -> impl Iterator<Item = ContextElement<'r, 's>> {
|
||||
self.tree.into_iter_until(&other.tree)
|
||||
}
|
||||
|
||||
pub fn check_exit_matcher(
|
||||
&'r self,
|
||||
i: &'s str,
|
||||
|
@ -1,17 +1,4 @@
|
||||
use nom::branch::alt;
|
||||
/*
|
||||
|
||||
hypothetical link:
|
||||
fn link = many_till(text_element, link_end)
|
||||
|
||||
but what if you start a bold?
|
||||
fn bold = many_till(text_element, bold_end) could eat the link_end
|
||||
|
||||
Do I pass along break-conditions? Passing link_end into bold's parser?
|
||||
|
||||
I'll try a very simple language first where asterisks always start/end bold and links are just between [ and ]. Paragraphs will have a blank line between them.
|
||||
|
||||
*/
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::character::complete::alphanumeric1;
|
||||
use nom::character::complete::line_ending;
|
||||
@ -19,7 +6,6 @@ use nom::character::complete::space1;
|
||||
use nom::combinator::eof;
|
||||
use nom::combinator::map;
|
||||
use nom::combinator::recognize;
|
||||
use nom::error::VerboseError;
|
||||
use nom::multi::many1;
|
||||
use nom::multi::many_till;
|
||||
use nom::sequence::tuple;
|
||||
@ -79,6 +65,12 @@ pub struct Link<'a> {
|
||||
pub contents: &'a str,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Paragraph<'a> {
|
||||
pub contents: Vec<TextElement<'a>>,
|
||||
pub paragraph_end: &'a str,
|
||||
}
|
||||
|
||||
pub fn line_break(input: &str) -> Res<&str, LineBreak> {
|
||||
map(line_ending, |s: &str| LineBreak { contents: s })(input)
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
use crate::parser::parser_with_context::parser_with_context;
|
||||
use crate::parser::text::paragraph_end;
|
||||
|
||||
use super::combinator::context_many1;
|
||||
use super::combinator::context_many_till;
|
||||
use super::error::CustomError;
|
||||
use super::error::MyError;
|
||||
@ -9,7 +10,6 @@ use super::nom_context::ChainBehavior;
|
||||
use super::nom_context::ContextElement;
|
||||
use super::nom_context::ContextTree;
|
||||
use super::nom_context::ExitMatcherNode;
|
||||
use super::nom_context::PreviousElementNode;
|
||||
use super::text::bold_end;
|
||||
use super::text::bold_start;
|
||||
use super::text::line_break;
|
||||
@ -20,35 +20,34 @@ use super::text::span;
|
||||
use super::text::symbol;
|
||||
use super::text::Bold;
|
||||
use super::text::Link;
|
||||
use super::text::Paragraph;
|
||||
use super::text::Res;
|
||||
use super::text::TextElement;
|
||||
use super::token::Token;
|
||||
use super::Context;
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::bytes::complete::take;
|
||||
use nom::combinator::cond;
|
||||
use nom::combinator::eof;
|
||||
use nom::combinator::map;
|
||||
use nom::combinator::not;
|
||||
use nom::combinator::peek;
|
||||
use nom::combinator::recognize;
|
||||
use nom::error::ErrorKind;
|
||||
use nom::error::ParseError;
|
||||
use nom::error::VerboseError;
|
||||
use nom::multi::many1;
|
||||
use nom::multi::many_till;
|
||||
use nom::sequence::tuple;
|
||||
use nom::IResult;
|
||||
use nom::InputLength;
|
||||
|
||||
type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult<I, O, E>;
|
||||
|
||||
pub fn document(input: &str) -> Res<&str, Vec<(Vec<TextElement>, &str)>> {
|
||||
pub fn document(input: &str) -> Res<&str, Vec<Paragraph>> {
|
||||
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
||||
let paragraph_parser = parser_with_context!(paragraph);
|
||||
let ret = many1(paragraph_parser(&initial_context))(input);
|
||||
ret
|
||||
let (remaining, tokens) = context_many1(&initial_context, paragraph)(input)?;
|
||||
let paragraphs = tokens
|
||||
.into_iter()
|
||||
.map(|token| match token {
|
||||
Token::TextElement(_) => unreachable!(),
|
||||
Token::Paragraph(paragraph) => paragraph,
|
||||
})
|
||||
.collect();
|
||||
Ok((remaining, paragraphs))
|
||||
}
|
||||
|
||||
pub fn context_paragraph_end<'s, 'r>(
|
||||
@ -62,6 +61,10 @@ fn can_start_bold<'s, 'r>(context: Context<'r, 's>) -> bool {
|
||||
_preceded_by_whitespace(context) && !_in_section(context, "bold")
|
||||
}
|
||||
|
||||
fn can_start_link<'s, 'r>(context: Context<'r, 's>) -> bool {
|
||||
!_in_section(context, "link")
|
||||
}
|
||||
|
||||
fn _in_section<'s, 'r, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool {
|
||||
for thing in context.iter() {
|
||||
match thing.get_data() {
|
||||
@ -93,6 +96,7 @@ fn _preceded_by_whitespace<'s, 'r>(context: Context<'r, 's>) -> bool {
|
||||
TextElement::Link(_) => return false,
|
||||
};
|
||||
}
|
||||
Token::Paragraph(_) => unreachable!(),
|
||||
};
|
||||
}
|
||||
ContextElement::StartOfParagraph => {
|
||||
@ -121,6 +125,20 @@ pub fn context_bold_start<'s, 'r>(
|
||||
}
|
||||
}
|
||||
|
||||
pub fn context_link_start<'s, 'r>(
|
||||
context: Context<'r, 's>,
|
||||
input: &'s str,
|
||||
) -> Res<&'s str, &'s str> {
|
||||
if can_start_link(context) {
|
||||
recognize(link_start)(input)
|
||||
} else {
|
||||
// TODO: Make this a specific error instead of just a generic MyError
|
||||
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
||||
"Cannot start link",
|
||||
))));
|
||||
}
|
||||
}
|
||||
|
||||
pub fn context_bold_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
let (remaining, actual_match) = recognize(bold_end)(input)?;
|
||||
peek(alt((
|
||||
@ -134,26 +152,35 @@ pub fn context_bold_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res
|
||||
Ok((remaining, actual_match))
|
||||
}
|
||||
|
||||
pub fn paragraph<'s, 'r>(
|
||||
context: Context<'r, 's>,
|
||||
i: &'s str,
|
||||
) -> Res<&'s str, (Vec<TextElement<'s>>, &'s str)> {
|
||||
pub fn context_link_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
let (remaining, actual_match) = recognize(link_end)(input)?;
|
||||
Ok((remaining, actual_match))
|
||||
}
|
||||
|
||||
pub fn paragraph<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Paragraph<'s>> {
|
||||
// Add a not(eof) check because many_till cannot match a zero-length string
|
||||
not(eof)(i)?;
|
||||
let paragraph_context = context
|
||||
.with_additional_node(ContextElement::StartOfParagraph)
|
||||
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
exit_matcher: ChainBehavior::AndParent(Some(&context_paragraph_end)),
|
||||
}));
|
||||
}))
|
||||
.with_additional_node(ContextElement::StartOfParagraph);
|
||||
let (remaining, (many, till)) =
|
||||
context_many_till(¶graph_context, flat_text_element, context_paragraph_end)(i)?;
|
||||
let many = many
|
||||
.into_iter()
|
||||
.filter_map(|token| match token {
|
||||
Token::TextElement(text_element) => Some(text_element),
|
||||
Token::Paragraph(_) => panic!("There should only be text elements in paragraphs."),
|
||||
})
|
||||
.collect();
|
||||
Ok((remaining, (many, till)))
|
||||
Ok((
|
||||
remaining,
|
||||
Paragraph {
|
||||
contents: many,
|
||||
paragraph_end: till,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
fn flat_text_element<'s, 'r>(
|
||||
@ -191,21 +218,16 @@ fn flat_bold<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Bold<
|
||||
Ok((remaining, ret))
|
||||
}
|
||||
|
||||
fn recognize_link_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
||||
recognize(link_end)(input)
|
||||
}
|
||||
|
||||
fn flat_link<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Link<'s>> {
|
||||
let nom_context =
|
||||
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
exit_matcher: ChainBehavior::AndParent(Some(&recognize_link_end)),
|
||||
}));
|
||||
// let nom_context = context.with_additional_exit_matcher(&recognize_link_end);
|
||||
let text_element_parser = parser_with_context!(flat_text_element)(&nom_context);
|
||||
let (remaining, captured) = recognize(tuple((
|
||||
link_start,
|
||||
many_till(text_element_parser, link_end),
|
||||
)))(i)?;
|
||||
let link_start = parser_with_context!(context_link_start)(&context);
|
||||
let nom_context = context
|
||||
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
||||
exit_matcher: ChainBehavior::AndParent(Some(&context_link_end)),
|
||||
}))
|
||||
.with_additional_node(ContextElement::Context("link"));
|
||||
let (remaining, captured) = recognize(tuple((link_start, |i| {
|
||||
context_many_till(&nom_context, flat_text_element, context_link_end)(i)
|
||||
})))(i)?;
|
||||
let ret = Link { contents: captured };
|
||||
Ok((remaining, ret))
|
||||
}
|
||||
|
@ -1,8 +1,10 @@
|
||||
use super::text::Paragraph;
|
||||
use super::text::TextElement;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Token<'a> {
|
||||
TextElement(TextElement<'a>),
|
||||
Paragraph(Paragraph<'a>),
|
||||
}
|
||||
|
||||
impl<'a> Into<Token<'a>> for TextElement<'a> {
|
||||
@ -10,3 +12,9 @@ impl<'a> Into<Token<'a>> for TextElement<'a> {
|
||||
Token::TextElement(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Into<Token<'a>> for Paragraph<'a> {
|
||||
fn into(self) -> Token<'a> {
|
||||
Token::Paragraph(self)
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user