Merge branch 'context_many1'

This commit is contained in:
Tom Alexander 2022-12-18 02:09:42 -05:00
commit 279b9dceb4
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
7 changed files with 159 additions and 91 deletions

View File

@ -1,45 +1,63 @@
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::text::paragraph_end;
use super::error::CustomError;
use super::error::MyError;
use super::nom_context::ChainBehavior;
use super::nom_context::ContextElement;
use super::nom_context::ContextTree;
use super::nom_context::ExitMatcherNode;
use super::nom_context::PreviousElementNode;
use super::text::bold_end;
use super::text::bold_start;
use super::text::line_break;
use super::text::link_end;
use super::text::link_start;
use super::text::space;
use super::text::span;
use super::text::symbol;
use super::text::Bold;
use super::text::Link;
use super::text::Res;
use super::text::TextElement;
use super::token::Token;
use super::Context;
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::bytes::complete::take;
use nom::combinator::cond;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::error::ErrorKind;
use nom::error::ParseError;
use nom::error::VerboseError;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::tuple;
use nom::IResult;
use nom::InputLength;
pub fn context_many1<'r, 's, I, O, E, M>(
context: Context<'r, 's>,
mut many_matcher: M,
) -> impl FnMut(I) -> IResult<I, Vec<Token<'s>>, E> + 'r
where
I: Clone + InputLength,
E: ParseError<I>,
M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
O: Into<Token<'s>>,
{
move |mut i: I| {
let mut err = None;
// TODO: Can I eliminate the clone? I think this is incrementing the reference count
let mut current_context = context.clone();
// Despite the clone, the Rc should still point to the same value.
assert!(current_context.ptr_eq(context));
loop {
match many_matcher(&current_context, i.clone()) {
Ok((remaining, many_elem)) => {
current_context = current_context.with_additional_node(
ContextElement::PreviousElementNode(PreviousElementNode {
element: many_elem.into(),
}),
);
i = remaining;
}
the_error @ Err(_) => {
err = Some(the_error);
break;
}
}
}
let mut elements: Vec<Token<'s>> = current_context
.into_iter_until(context)
.filter_map(|context_element| match context_element {
ContextElement::PreviousElementNode(elem) => Some(elem.element),
ContextElement::ExitMatcherNode(_) => None,
ContextElement::Context(_) => None,
ContextElement::StartOfParagraph => None,
})
.collect();
if elements.is_empty() {
if let Some(err) = err {
err?;
}
}
elements.reverse();
Ok((i, elements))
}
}
pub fn context_many_till<'r, 's, I, O, E, F, M, T>(
context: Context<'r, 's>,
mut many_matcher: M,
@ -53,7 +71,7 @@ where
O: Into<Token<'s>>,
{
move |mut i: I| {
// TODO: Can I eliminate the clone?
// TODO: Can I eliminate the clone? I think this is incrementing the reference count
let mut current_context = context.clone();
// Despite the clone, the Rc should still point to the same value, otherwise we'll get stuck in an endless loop.
assert!(current_context.ptr_eq(context));

View File

@ -1,8 +1,5 @@
use nom::bytes::complete::tag;
use nom::error::ErrorKind;
use nom::error::ParseError;
use nom::error::VerboseError;
use nom::IResult;
#[derive(Debug, PartialEq)]
pub enum CustomError<I> {

View File

@ -27,7 +27,7 @@ impl<T> Node<T> {
}
// TODO: This Debug is only needed because of the try_unwrap+expect
impl<T: Debug> List<T> {
impl<T> List<T> {
pub fn new() -> Self {
List { head: None }
}
@ -45,8 +45,10 @@ impl<T: Debug> List<T> {
match self.head.take() {
None => (None, List::new()),
Some(popped_node) => {
let extracted_node =
Rc::try_unwrap(popped_node).expect("TODO I should handle this better");
let extracted_node = match Rc::try_unwrap(popped_node) {
Ok(node) => node,
Err(e) => panic!("try_unwrap failed on Rc in pop_front on List."),
};
(
Some(extracted_node.data),
List {
@ -91,6 +93,13 @@ impl<T: Debug> List<T> {
stop: &other.head,
}
}
pub fn into_iter_until<'a>(self, other: &'a List<T>) -> impl Iterator<Item = T> + 'a {
NodeIntoIterUntil {
position: self,
stop: &other,
}
}
}
pub struct NodeIter<'a, T> {
@ -144,3 +153,21 @@ impl<'a, T> Iterator for NodeIterUntil<'a, T> {
Some(return_value)
}
}
pub struct NodeIntoIterUntil<'a, T> {
position: List<T>,
stop: &'a List<T>,
}
impl<'a, T> Iterator for NodeIntoIterUntil<'a, T> {
type Item = T;
fn next(&mut self) -> Option<Self::Item> {
if self.position.ptr_eq(self.stop) {
return None;
}
let (popped_element, new_position) = self.position.pop_front();
self.position = new_position;
popped_element
}
}

View File

@ -1,8 +1,5 @@
use std::rc::Rc;
use nom::bytes::complete::take;
use nom::combinator::not;
use nom::error::VerboseError;
use nom::IResult;
use super::error::CustomError;
@ -50,6 +47,13 @@ impl<'r, 's> ContextTree<'r, 's> {
self.tree.iter_until(&other.tree)
}
pub fn into_iter_until<'x: 'r>(
self,
other: &'x ContextTree<'x, 's>,
) -> impl Iterator<Item = ContextElement<'r, 's>> {
self.tree.into_iter_until(&other.tree)
}
pub fn check_exit_matcher(
&'r self,
i: &'s str,

View File

@ -1,17 +1,4 @@
use nom::branch::alt;
/*
hypothetical link:
fn link = many_till(text_element, link_end)
but what if you start a bold?
fn bold = many_till(text_element, bold_end) could eat the link_end
Do I pass along break-conditions? Passing link_end into bold's parser?
I'll try a very simple language first where asterisks always start/end bold and links are just between [ and ]. Paragraphs will have a blank line between them.
*/
use nom::bytes::complete::tag;
use nom::character::complete::alphanumeric1;
use nom::character::complete::line_ending;
@ -19,7 +6,6 @@ use nom::character::complete::space1;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::recognize;
use nom::error::VerboseError;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::tuple;
@ -79,6 +65,12 @@ pub struct Link<'a> {
pub contents: &'a str,
}
#[derive(Debug)]
pub struct Paragraph<'a> {
pub contents: Vec<TextElement<'a>>,
pub paragraph_end: &'a str,
}
pub fn line_break(input: &str) -> Res<&str, LineBreak> {
map(line_ending, |s: &str| LineBreak { contents: s })(input)
}

View File

@ -2,6 +2,7 @@
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::text::paragraph_end;
use super::combinator::context_many1;
use super::combinator::context_many_till;
use super::error::CustomError;
use super::error::MyError;
@ -9,7 +10,6 @@ use super::nom_context::ChainBehavior;
use super::nom_context::ContextElement;
use super::nom_context::ContextTree;
use super::nom_context::ExitMatcherNode;
use super::nom_context::PreviousElementNode;
use super::text::bold_end;
use super::text::bold_start;
use super::text::line_break;
@ -20,35 +20,34 @@ use super::text::span;
use super::text::symbol;
use super::text::Bold;
use super::text::Link;
use super::text::Paragraph;
use super::text::Res;
use super::text::TextElement;
use super::token::Token;
use super::Context;
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::bytes::complete::take;
use nom::combinator::cond;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::error::ErrorKind;
use nom::error::ParseError;
use nom::error::VerboseError;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::tuple;
use nom::IResult;
use nom::InputLength;
type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult<I, O, E>;
pub fn document(input: &str) -> Res<&str, Vec<(Vec<TextElement>, &str)>> {
pub fn document(input: &str) -> Res<&str, Vec<Paragraph>> {
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let paragraph_parser = parser_with_context!(paragraph);
let ret = many1(paragraph_parser(&initial_context))(input);
ret
let (remaining, tokens) = context_many1(&initial_context, paragraph)(input)?;
let paragraphs = tokens
.into_iter()
.map(|token| match token {
Token::TextElement(_) => unreachable!(),
Token::Paragraph(paragraph) => paragraph,
})
.collect();
Ok((remaining, paragraphs))
}
pub fn context_paragraph_end<'s, 'r>(
@ -62,6 +61,10 @@ fn can_start_bold<'s, 'r>(context: Context<'r, 's>) -> bool {
_preceded_by_whitespace(context) && !_in_section(context, "bold")
}
fn can_start_link<'s, 'r>(context: Context<'r, 's>) -> bool {
!_in_section(context, "link")
}
fn _in_section<'s, 'r, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool {
for thing in context.iter() {
match thing.get_data() {
@ -93,6 +96,7 @@ fn _preceded_by_whitespace<'s, 'r>(context: Context<'r, 's>) -> bool {
TextElement::Link(_) => return false,
};
}
Token::Paragraph(_) => unreachable!(),
};
}
ContextElement::StartOfParagraph => {
@ -121,6 +125,20 @@ pub fn context_bold_start<'s, 'r>(
}
}
pub fn context_link_start<'s, 'r>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
if can_start_link(context) {
recognize(link_start)(input)
} else {
// TODO: Make this a specific error instead of just a generic MyError
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Cannot start link",
))));
}
}
pub fn context_bold_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let (remaining, actual_match) = recognize(bold_end)(input)?;
peek(alt((
@ -134,26 +152,35 @@ pub fn context_bold_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res
Ok((remaining, actual_match))
}
pub fn paragraph<'s, 'r>(
context: Context<'r, 's>,
i: &'s str,
) -> Res<&'s str, (Vec<TextElement<'s>>, &'s str)> {
pub fn context_link_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let (remaining, actual_match) = recognize(link_end)(input)?;
Ok((remaining, actual_match))
}
pub fn paragraph<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Paragraph<'s>> {
// Add a not(eof) check because many_till cannot match a zero-length string
not(eof)(i)?;
let paragraph_context = context
.with_additional_node(ContextElement::StartOfParagraph)
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&context_paragraph_end)),
}));
}))
.with_additional_node(ContextElement::StartOfParagraph);
let (remaining, (many, till)) =
context_many_till(&paragraph_context, flat_text_element, context_paragraph_end)(i)?;
let many = many
.into_iter()
.filter_map(|token| match token {
Token::TextElement(text_element) => Some(text_element),
Token::Paragraph(_) => panic!("There should only be text elements in paragraphs."),
})
.collect();
Ok((remaining, (many, till)))
Ok((
remaining,
Paragraph {
contents: many,
paragraph_end: till,
},
))
}
fn flat_text_element<'s, 'r>(
@ -191,21 +218,16 @@ fn flat_bold<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Bold<
Ok((remaining, ret))
}
fn recognize_link_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
recognize(link_end)(input)
}
fn flat_link<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Link<'s>> {
let nom_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&recognize_link_end)),
}));
// let nom_context = context.with_additional_exit_matcher(&recognize_link_end);
let text_element_parser = parser_with_context!(flat_text_element)(&nom_context);
let (remaining, captured) = recognize(tuple((
link_start,
many_till(text_element_parser, link_end),
)))(i)?;
let link_start = parser_with_context!(context_link_start)(&context);
let nom_context = context
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&context_link_end)),
}))
.with_additional_node(ContextElement::Context("link"));
let (remaining, captured) = recognize(tuple((link_start, |i| {
context_many_till(&nom_context, flat_text_element, context_link_end)(i)
})))(i)?;
let ret = Link { contents: captured };
Ok((remaining, ret))
}

View File

@ -1,8 +1,10 @@
use super::text::Paragraph;
use super::text::TextElement;
#[derive(Debug)]
pub enum Token<'a> {
TextElement(TextElement<'a>),
Paragraph(Paragraph<'a>),
}
impl<'a> Into<Token<'a>> for TextElement<'a> {
@ -10,3 +12,9 @@ impl<'a> Into<Token<'a>> for TextElement<'a> {
Token::TextElement(self)
}
}
impl<'a> Into<Token<'a>> for Paragraph<'a> {
fn into(self) -> Token<'a> {
Token::Paragraph(self)
}
}