From 1f1a18782e794b4ffbafbd6d905531473c492257 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 16 Dec 2022 00:21:13 -0500 Subject: [PATCH 1/6] Start of many1. --- src/parser/combinator.rs | 47 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/src/parser/combinator.rs b/src/parser/combinator.rs index 2daf5991..c281d483 100644 --- a/src/parser/combinator.rs +++ b/src/parser/combinator.rs @@ -40,6 +40,51 @@ use nom::sequence::tuple; use nom::IResult; use nom::InputLength; +pub fn context_many1<'r, 's, I, O, E, M>( + context: Context<'r, 's>, + mut many_matcher: M, +) -> impl FnMut(I) -> IResult>, E> + 'r +where + I: Clone + InputLength, + E: ParseError, + M: for<'x> Fn(Context<'x, 's>, I) -> IResult + 'r, + O: Into>, +{ + move |mut i: I| { + let mut err = None; + // TODO: Can I eliminate the clone? I think this is incrementing the reference count + let mut current_context = context.clone(); + // Despite the clone, the Rc should still point to the same value. + assert!(current_context.ptr_eq(context)); + loop { + match many_matcher(¤t_context, i.clone()) { + Ok((remaining, many_elem)) => { + current_context = current_context.with_additional_node( + ContextElement::PreviousElementNode(PreviousElementNode { + element: many_elem.into(), + }), + ); + i = remaining; + } + the_error @ Err(_) => { + err = Some(the_error); + break; + } + } + } + current_context + .iter_until(context) + .filter(|context_element| match context_element.get_data() { + ContextElement::PreviousElementNode(_) => true, + ContextElement::ExitMatcherNode(_) => false, + ContextElement::Context(_) => false, + ContextElement::StartOfParagraph => false, + }); + + // todo + todo!() + } +} pub fn context_many_till<'r, 's, I, O, E, F, M, T>( context: Context<'r, 's>, mut many_matcher: M, @@ -53,7 +98,7 @@ where O: Into>, { move |mut i: I| { - // TODO: Can I eliminate the clone? + // TODO: Can I eliminate the clone? I think this is incrementing the reference count let mut current_context = context.clone(); // Despite the clone, the Rc should still point to the same value, otherwise we'll get stuck in an endless loop. assert!(current_context.ptr_eq(context)); From 6459cc64d097a4a88f471135202e09fe610a7639 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 16 Dec 2022 00:47:33 -0500 Subject: [PATCH 2/6] I think thats all for context_many1. Just need to start using it. --- src/parser/combinator.rs | 27 ++++++++++++++++----------- src/parser/list.rs | 33 ++++++++++++++++++++++++++++++--- src/parser/nom_context.rs | 7 +++++++ 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/src/parser/combinator.rs b/src/parser/combinator.rs index c281d483..58ea89b4 100644 --- a/src/parser/combinator.rs +++ b/src/parser/combinator.rs @@ -72,19 +72,24 @@ where } } } - current_context - .iter_until(context) - .filter(|context_element| match context_element.get_data() { - ContextElement::PreviousElementNode(_) => true, - ContextElement::ExitMatcherNode(_) => false, - ContextElement::Context(_) => false, - ContextElement::StartOfParagraph => false, - }); - - // todo - todo!() + let elements: Vec> = current_context + .into_iter_until(context) + .filter_map(|context_element| match context_element { + ContextElement::PreviousElementNode(elem) => Some(elem.element), + ContextElement::ExitMatcherNode(_) => None, + ContextElement::Context(_) => None, + ContextElement::StartOfParagraph => None, + }) + .collect(); + if elements.is_empty() { + if let Some(err) = err { + err?; + } + } + Ok((i, elements)) } } + pub fn context_many_till<'r, 's, I, O, E, F, M, T>( context: Context<'r, 's>, mut many_matcher: M, diff --git a/src/parser/list.rs b/src/parser/list.rs index 0a9b5c4a..ccf6687b 100644 --- a/src/parser/list.rs +++ b/src/parser/list.rs @@ -27,7 +27,7 @@ impl Node { } // TODO: This Debug is only needed because of the try_unwrap+expect -impl List { +impl List { pub fn new() -> Self { List { head: None } } @@ -45,8 +45,10 @@ impl List { match self.head.take() { None => (None, List::new()), Some(popped_node) => { - let extracted_node = - Rc::try_unwrap(popped_node).expect("TODO I should handle this better"); + let extracted_node = match Rc::try_unwrap(popped_node) { + Ok(node) => node, + Err(e) => panic!("try_unwrap failed on Rc in pop_front on List."), + }; ( Some(extracted_node.data), List { @@ -91,6 +93,13 @@ impl List { stop: &other.head, } } + + pub fn into_iter_until<'a>(self, other: &'a List) -> impl Iterator + 'a { + NodeIntoIterUntil { + position: self, + stop: &other, + } + } } pub struct NodeIter<'a, T> { @@ -144,3 +153,21 @@ impl<'a, T> Iterator for NodeIterUntil<'a, T> { Some(return_value) } } + +pub struct NodeIntoIterUntil<'a, T> { + position: List, + stop: &'a List, +} + +impl<'a, T> Iterator for NodeIntoIterUntil<'a, T> { + type Item = T; + + fn next(&mut self) -> Option { + if self.position.ptr_eq(self.stop) { + return None; + } + let (popped_element, new_position) = self.position.pop_front(); + self.position = new_position; + popped_element + } +} diff --git a/src/parser/nom_context.rs b/src/parser/nom_context.rs index c2ea28fc..632dc572 100644 --- a/src/parser/nom_context.rs +++ b/src/parser/nom_context.rs @@ -50,6 +50,13 @@ impl<'r, 's> ContextTree<'r, 's> { self.tree.iter_until(&other.tree) } + pub fn into_iter_until<'x: 'r>( + self, + other: &'x ContextTree<'x, 's>, + ) -> impl Iterator> { + self.tree.into_iter_until(&other.tree) + } + pub fn check_exit_matcher( &'r self, i: &'s str, From 601fc4776adf0a03f019e48fd1c5855800f87d7e Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 16 Dec 2022 00:53:29 -0500 Subject: [PATCH 3/6] Have to rework the paragraph parser to return something that I'd want to put in a token, like a Paragraph struct. --- src/parser/text_element_parser.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/text_element_parser.rs b/src/parser/text_element_parser.rs index ac10512b..b60f27fa 100644 --- a/src/parser/text_element_parser.rs +++ b/src/parser/text_element_parser.rs @@ -2,6 +2,7 @@ use crate::parser::parser_with_context::parser_with_context; use crate::parser::text::paragraph_end; +use super::combinator::context_many1; use super::combinator::context_many_till; use super::error::CustomError; use super::error::MyError; @@ -46,8 +47,7 @@ type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult Res<&str, Vec<(Vec, &str)>> { let initial_context: ContextTree<'_, '_> = ContextTree::new(); - let paragraph_parser = parser_with_context!(paragraph); - let ret = many1(paragraph_parser(&initial_context))(input); + let ret = context_many1(&initial_context, paragraph)(input); ret } From 1da38c8f7d42905e4ac4e7af91b898afd3acf1b2 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 16 Dec 2022 01:35:49 -0500 Subject: [PATCH 4/6] Use context_many1 for paragraphs in a document. --- src/parser/combinator.rs | 3 ++- src/parser/text.rs | 6 ++++++ src/parser/text_element_parser.rs | 32 +++++++++++++++++++++---------- src/parser/token.rs | 8 ++++++++ 4 files changed, 38 insertions(+), 11 deletions(-) diff --git a/src/parser/combinator.rs b/src/parser/combinator.rs index 58ea89b4..f5d5fca1 100644 --- a/src/parser/combinator.rs +++ b/src/parser/combinator.rs @@ -72,7 +72,7 @@ where } } } - let elements: Vec> = current_context + let mut elements: Vec> = current_context .into_iter_until(context) .filter_map(|context_element| match context_element { ContextElement::PreviousElementNode(elem) => Some(elem.element), @@ -86,6 +86,7 @@ where err?; } } + elements.reverse(); Ok((i, elements)) } } diff --git a/src/parser/text.rs b/src/parser/text.rs index 69da8aca..fa986506 100644 --- a/src/parser/text.rs +++ b/src/parser/text.rs @@ -79,6 +79,12 @@ pub struct Link<'a> { pub contents: &'a str, } +#[derive(Debug)] +pub struct Paragraph<'a> { + pub contents: Vec>, + pub paragraph_end: &'a str, +} + pub fn line_break(input: &str) -> Res<&str, LineBreak> { map(line_ending, |s: &str| LineBreak { contents: s })(input) } diff --git a/src/parser/text_element_parser.rs b/src/parser/text_element_parser.rs index b60f27fa..a065e03f 100644 --- a/src/parser/text_element_parser.rs +++ b/src/parser/text_element_parser.rs @@ -21,6 +21,7 @@ use super::text::span; use super::text::symbol; use super::text::Bold; use super::text::Link; +use super::text::Paragraph; use super::text::Res; use super::text::TextElement; use super::token::Token; @@ -45,10 +46,16 @@ use nom::InputLength; type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult; -pub fn document(input: &str) -> Res<&str, Vec<(Vec, &str)>> { +pub fn document(input: &str) -> Res<&str, Vec> { let initial_context: ContextTree<'_, '_> = ContextTree::new(); - let ret = context_many1(&initial_context, paragraph)(input); - ret + let (remaining, tokens) = context_many1(&initial_context, paragraph)(input)?; + let paragraphs = tokens.into_iter().map(|token| { + match token { + Token::TextElement(_) => unreachable!(), + Token::Paragraph(paragraph) => paragraph, + } + }).collect(); + Ok((remaining, paragraphs)) } pub fn context_paragraph_end<'s, 'r>( @@ -93,6 +100,7 @@ fn _preceded_by_whitespace<'s, 'r>(context: Context<'r, 's>) -> bool { TextElement::Link(_) => return false, }; } + Token::Paragraph(_) => unreachable!(), }; } ContextElement::StartOfParagraph => { @@ -134,26 +142,30 @@ pub fn context_bold_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res Ok((remaining, actual_match)) } -pub fn paragraph<'s, 'r>( - context: Context<'r, 's>, - i: &'s str, -) -> Res<&'s str, (Vec>, &'s str)> { +pub fn paragraph<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Paragraph<'s>> { // Add a not(eof) check because many_till cannot match a zero-length string not(eof)(i)?; let paragraph_context = context - .with_additional_node(ContextElement::StartOfParagraph) .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { exit_matcher: ChainBehavior::AndParent(Some(&context_paragraph_end)), - })); + })) + .with_additional_node(ContextElement::StartOfParagraph); let (remaining, (many, till)) = context_many_till(¶graph_context, flat_text_element, context_paragraph_end)(i)?; let many = many .into_iter() .filter_map(|token| match token { Token::TextElement(text_element) => Some(text_element), + Token::Paragraph(_) => panic!("There should only be text elements in paragraphs."), }) .collect(); - Ok((remaining, (many, till))) + Ok(( + remaining, + Paragraph { + contents: many, + paragraph_end: till, + }, + )) } fn flat_text_element<'s, 'r>( diff --git a/src/parser/token.rs b/src/parser/token.rs index 64eb7837..959e6333 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -1,8 +1,10 @@ +use super::text::Paragraph; use super::text::TextElement; #[derive(Debug)] pub enum Token<'a> { TextElement(TextElement<'a>), + Paragraph(Paragraph<'a>), } impl<'a> Into> for TextElement<'a> { @@ -10,3 +12,9 @@ impl<'a> Into> for TextElement<'a> { Token::TextElement(self) } } + +impl<'a> Into> for Paragraph<'a> { + fn into(self) -> Token<'a> { + Token::Paragraph(self) + } +} From 9eab737aa8e1ae0e04aac57ca080a8e981f9db2f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 16 Dec 2022 01:39:57 -0500 Subject: [PATCH 5/6] Add comment. --- src/parser/combinator.rs | 2 -- src/parser/text_element_parser.rs | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/parser/combinator.rs b/src/parser/combinator.rs index f5d5fca1..dd4d3cc7 100644 --- a/src/parser/combinator.rs +++ b/src/parser/combinator.rs @@ -34,8 +34,6 @@ use nom::combinator::recognize; use nom::error::ErrorKind; use nom::error::ParseError; use nom::error::VerboseError; -use nom::multi::many1; -use nom::multi::many_till; use nom::sequence::tuple; use nom::IResult; use nom::InputLength; diff --git a/src/parser/text_element_parser.rs b/src/parser/text_element_parser.rs index a065e03f..72f21db2 100644 --- a/src/parser/text_element_parser.rs +++ b/src/parser/text_element_parser.rs @@ -38,7 +38,6 @@ use nom::combinator::recognize; use nom::error::ErrorKind; use nom::error::ParseError; use nom::error::VerboseError; -use nom::multi::many1; use nom::multi::many_till; use nom::sequence::tuple; use nom::IResult; @@ -208,6 +207,7 @@ fn recognize_link_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<& } fn flat_link<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Link<'s>> { + // TODO: Link has to be updated to contextual functions like bold was let nom_context = context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { exit_matcher: ChainBehavior::AndParent(Some(&recognize_link_end)), From 6caaeddc37181cde2b7c63052603ca579a788158 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 18 Dec 2022 02:05:21 -0500 Subject: [PATCH 6/6] Update link to use the new contextual combinators like bold. --- src/parser/combinator.rs | 31 --------------- src/parser/error.rs | 3 -- src/parser/nom_context.rs | 3 -- src/parser/text.rs | 14 ------- src/parser/text_element_parser.rs | 64 ++++++++++++++++++------------- 5 files changed, 37 insertions(+), 78 deletions(-) diff --git a/src/parser/combinator.rs b/src/parser/combinator.rs index dd4d3cc7..dcfabd1f 100644 --- a/src/parser/combinator.rs +++ b/src/parser/combinator.rs @@ -1,40 +1,9 @@ -use crate::parser::parser_with_context::parser_with_context; -use crate::parser::text::paragraph_end; - -use super::error::CustomError; -use super::error::MyError; -use super::nom_context::ChainBehavior; use super::nom_context::ContextElement; -use super::nom_context::ContextTree; -use super::nom_context::ExitMatcherNode; use super::nom_context::PreviousElementNode; -use super::text::bold_end; -use super::text::bold_start; -use super::text::line_break; -use super::text::link_end; -use super::text::link_start; -use super::text::space; -use super::text::span; -use super::text::symbol; -use super::text::Bold; -use super::text::Link; -use super::text::Res; -use super::text::TextElement; use super::token::Token; use super::Context; -use nom::branch::alt; -use nom::bytes::complete::tag; -use nom::bytes::complete::take; -use nom::combinator::cond; -use nom::combinator::eof; -use nom::combinator::map; -use nom::combinator::not; -use nom::combinator::peek; -use nom::combinator::recognize; use nom::error::ErrorKind; use nom::error::ParseError; -use nom::error::VerboseError; -use nom::sequence::tuple; use nom::IResult; use nom::InputLength; diff --git a/src/parser/error.rs b/src/parser/error.rs index c45ba142..10b48634 100644 --- a/src/parser/error.rs +++ b/src/parser/error.rs @@ -1,8 +1,5 @@ -use nom::bytes::complete::tag; use nom::error::ErrorKind; use nom::error::ParseError; -use nom::error::VerboseError; -use nom::IResult; #[derive(Debug, PartialEq)] pub enum CustomError { diff --git a/src/parser/nom_context.rs b/src/parser/nom_context.rs index 632dc572..76d1ab23 100644 --- a/src/parser/nom_context.rs +++ b/src/parser/nom_context.rs @@ -1,8 +1,5 @@ use std::rc::Rc; -use nom::bytes::complete::take; -use nom::combinator::not; -use nom::error::VerboseError; use nom::IResult; use super::error::CustomError; diff --git a/src/parser/text.rs b/src/parser/text.rs index fa986506..b1fc2400 100644 --- a/src/parser/text.rs +++ b/src/parser/text.rs @@ -1,17 +1,4 @@ use nom::branch::alt; -/* - -hypothetical link: -fn link = many_till(text_element, link_end) - -but what if you start a bold? -fn bold = many_till(text_element, bold_end) could eat the link_end - -Do I pass along break-conditions? Passing link_end into bold's parser? - -I'll try a very simple language first where asterisks always start/end bold and links are just between [ and ]. Paragraphs will have a blank line between them. - -*/ use nom::bytes::complete::tag; use nom::character::complete::alphanumeric1; use nom::character::complete::line_ending; @@ -19,7 +6,6 @@ use nom::character::complete::space1; use nom::combinator::eof; use nom::combinator::map; use nom::combinator::recognize; -use nom::error::VerboseError; use nom::multi::many1; use nom::multi::many_till; use nom::sequence::tuple; diff --git a/src/parser/text_element_parser.rs b/src/parser/text_element_parser.rs index 72f21db2..fa9d373d 100644 --- a/src/parser/text_element_parser.rs +++ b/src/parser/text_element_parser.rs @@ -10,7 +10,6 @@ use super::nom_context::ChainBehavior; use super::nom_context::ContextElement; use super::nom_context::ContextTree; use super::nom_context::ExitMatcherNode; -use super::nom_context::PreviousElementNode; use super::text::bold_end; use super::text::bold_start; use super::text::line_break; @@ -28,32 +27,26 @@ use super::token::Token; use super::Context; use nom::branch::alt; use nom::bytes::complete::tag; -use nom::bytes::complete::take; -use nom::combinator::cond; use nom::combinator::eof; use nom::combinator::map; use nom::combinator::not; use nom::combinator::peek; use nom::combinator::recognize; -use nom::error::ErrorKind; -use nom::error::ParseError; -use nom::error::VerboseError; -use nom::multi::many_till; use nom::sequence::tuple; use nom::IResult; -use nom::InputLength; type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult; pub fn document(input: &str) -> Res<&str, Vec> { let initial_context: ContextTree<'_, '_> = ContextTree::new(); let (remaining, tokens) = context_many1(&initial_context, paragraph)(input)?; - let paragraphs = tokens.into_iter().map(|token| { - match token { + let paragraphs = tokens + .into_iter() + .map(|token| match token { Token::TextElement(_) => unreachable!(), Token::Paragraph(paragraph) => paragraph, - } - }).collect(); + }) + .collect(); Ok((remaining, paragraphs)) } @@ -68,6 +61,10 @@ fn can_start_bold<'s, 'r>(context: Context<'r, 's>) -> bool { _preceded_by_whitespace(context) && !_in_section(context, "bold") } +fn can_start_link<'s, 'r>(context: Context<'r, 's>) -> bool { + !_in_section(context, "link") +} + fn _in_section<'s, 'r, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool { for thing in context.iter() { match thing.get_data() { @@ -128,6 +125,20 @@ pub fn context_bold_start<'s, 'r>( } } +pub fn context_link_start<'s, 'r>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + if can_start_link(context) { + recognize(link_start)(input) + } else { + // TODO: Make this a specific error instead of just a generic MyError + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Cannot start link", + )))); + } +} + pub fn context_bold_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { let (remaining, actual_match) = recognize(bold_end)(input)?; peek(alt(( @@ -141,6 +152,11 @@ pub fn context_bold_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res Ok((remaining, actual_match)) } +pub fn context_link_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let (remaining, actual_match) = recognize(link_end)(input)?; + Ok((remaining, actual_match)) +} + pub fn paragraph<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Paragraph<'s>> { // Add a not(eof) check because many_till cannot match a zero-length string not(eof)(i)?; @@ -202,22 +218,16 @@ fn flat_bold<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Bold< Ok((remaining, ret)) } -fn recognize_link_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { - recognize(link_end)(input) -} - fn flat_link<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Link<'s>> { - // TODO: Link has to be updated to contextual functions like bold was - let nom_context = - context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { - exit_matcher: ChainBehavior::AndParent(Some(&recognize_link_end)), - })); - // let nom_context = context.with_additional_exit_matcher(&recognize_link_end); - let text_element_parser = parser_with_context!(flat_text_element)(&nom_context); - let (remaining, captured) = recognize(tuple(( - link_start, - many_till(text_element_parser, link_end), - )))(i)?; + let link_start = parser_with_context!(context_link_start)(&context); + let nom_context = context + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + exit_matcher: ChainBehavior::AndParent(Some(&context_link_end)), + })) + .with_additional_node(ContextElement::Context("link")); + let (remaining, captured) = recognize(tuple((link_start, |i| { + context_many_till(&nom_context, flat_text_element, context_link_end)(i) + })))(i)?; let ret = Link { contents: captured }; Ok((remaining, ret)) }