diff --git a/org_mode_samples/element_container_priority/README.org b/org_mode_samples/element_container_priority/README.org index 86c9d90..1d8a2a5 100644 --- a/org_mode_samples/element_container_priority/README.org +++ b/org_mode_samples/element_container_priority/README.org @@ -80,3 +80,5 @@ This test interleaves the opening and closing of each element container to see w Then go through elements parsing them in-order ** Should paragraph exit matcher apply to the element matcher in the exit matcher No. But paragraph exit matcher needs to apply to the object matcher. +** Maybe two groups of elements: alpha and beta +When entering an alpha element, ignore beta exit matchers above this in the context. diff --git a/org_mode_samples/element_container_priority/paragraph_greater_block.org b/org_mode_samples/element_container_priority/paragraph_greater_block.org index 24d76ec..f503036 100644 --- a/org_mode_samples/element_container_priority/paragraph_greater_block.org +++ b/org_mode_samples/element_container_priority/paragraph_greater_block.org @@ -2,4 +2,7 @@ foo #+begin_center + + + #+end_center diff --git a/org_mode_samples/paragraphs/many_blank_lines.org b/org_mode_samples/paragraphs/many_blank_lines.org new file mode 100644 index 0000000..952468b --- /dev/null +++ b/org_mode_samples/paragraphs/many_blank_lines.org @@ -0,0 +1,9 @@ +this is a test with + + + + + + + +many blank lines between paragraphs diff --git a/src/parser/document.rs b/src/parser/document.rs index cf5addd..f2e4308 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -14,8 +14,8 @@ use nom::multi::many_till; use nom::sequence::tuple; use crate::parser::element::element; +use crate::parser::exiting::ExitClass; use crate::parser::object::standard_set_object; -use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; use crate::parser::parser_context::ExitMatcherNode; @@ -114,7 +114,8 @@ fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Sec .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true)) .with_additional_node(ContextElement::Context("section")) .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { - exit_matcher: ChainBehavior::AndParent(Some(§ion_end)), + class: ExitClass::Document, + exit_matcher: §ion_end, })); let element_matcher = parser_with_context!(element)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); @@ -123,7 +124,8 @@ fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Sec |(children, _exit_contents)| !children.is_empty(), )(input)?; - let (remaining, _trailing_ws) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; + let (remaining, _trailing_ws) = + maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; let source = get_consumed(input, remaining); Ok((remaining, Section { source, children })) @@ -167,7 +169,8 @@ fn headline<'r, 's>( ) -> Res<&'s str, (usize, &'s str, Vec>, &'s str)> { let parser_context = context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { - exit_matcher: ChainBehavior::AndParent(Some(&headline_end)), + class: ExitClass::Document, + exit_matcher: &headline_end, })); let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context); let start_of_line_matcher = parser_with_context!(start_of_line)(&parser_context); diff --git a/src/parser/drawer.rs b/src/parser/drawer.rs index 2c417e8..c126af3 100644 --- a/src/parser/drawer.rs +++ b/src/parser/drawer.rs @@ -11,7 +11,7 @@ use nom::sequence::tuple; use super::Context; use crate::parser::element::element; use crate::parser::error::Res; -use crate::parser::parser_context::ChainBehavior; +use crate::parser::exiting::ExitClass; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; @@ -36,7 +36,8 @@ pub fn drawer<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, let parser_context = context .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true)) .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { - exit_matcher: ChainBehavior::AndParent(Some(&drawer_end)), + class: ExitClass::Alpha, + exit_matcher: &drawer_end, })); let element_matcher = parser_with_context!(element)(&parser_context); diff --git a/src/parser/exiting.rs b/src/parser/exiting.rs new file mode 100644 index 0000000..a7e3fa7 --- /dev/null +++ b/src/parser/exiting.rs @@ -0,0 +1,17 @@ +#[derive(Debug, Copy, Clone)] +pub enum ExitClass { + /// Headlines and sections. + Document = 1, + + /// Elements who take priority over beta elements when matching. + Alpha = 20, + + /// Elements who cede priority to alpha elements when matching. + Beta = 300, +} + +impl std::fmt::Display for ExitClass { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} diff --git a/src/parser/footnote_definition.rs b/src/parser/footnote_definition.rs index bc01939..307b04b 100644 --- a/src/parser/footnote_definition.rs +++ b/src/parser/footnote_definition.rs @@ -2,8 +2,8 @@ use super::error::Res; use super::util::WORD_CONSTITUENT_CHARACTERS; use super::Context; use crate::parser::element::element; +use crate::parser::exiting::ExitClass; use crate::parser::greater_element::FootnoteDefinition; -use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; @@ -37,7 +37,8 @@ pub fn footnote_definition<'r, 's>( let parser_context = context .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true)) .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { - exit_matcher: ChainBehavior::IgnoreParent(Some(&footnote_definition_end)), + class: ExitClass::Alpha, + exit_matcher: &footnote_definition_end, })); // TODO: The problem is we are not accounting for trailing whitespace like we do in section. Maybe it would be easier if we passed down whether or not to parse trailing whitespace into the element matcher similar to how tag takes in parameters. let element_matcher = parser_with_context!(element)(&parser_context); diff --git a/src/parser/greater_block.rs b/src/parser/greater_block.rs index 1524b97..f83d041 100644 --- a/src/parser/greater_block.rs +++ b/src/parser/greater_block.rs @@ -1,23 +1,29 @@ use super::error::Res; +use super::object::TextMarkup; use super::Context; use crate::parser::element::element; use crate::parser::error::CustomError; use crate::parser::error::MyError; +use crate::parser::exiting::ExitClass; use crate::parser::greater_element::GreaterBlock; -use crate::parser::parser_context::ChainBehavior; +use crate::parser::object::Object; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::blank_line; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting; use crate::parser::util::start_of_line; +use crate::parser::Element; +use crate::parser::Paragraph; use nom::branch::alt; use nom::bytes::complete::is_not; use nom::bytes::complete::tag_no_case; use nom::character::complete::line_ending; use nom::character::complete::space0; use nom::character::complete::space1; +use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::opt; use nom::combinator::verify; @@ -45,14 +51,29 @@ pub fn greater_block<'r, 's>( .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true)) .with_additional_node(ContextElement::GreaterBlock(name)) .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { - exit_matcher: ChainBehavior::IgnoreParent(Some(&greater_block_end)), + class: ExitClass::Alpha, + exit_matcher: &greater_block_end, })); + let parameters = match parameters { + Some((_ws, parameters)) => Some(parameters), + None => None, + }; let element_matcher = parser_with_context!(element)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); // TODO: Not handling nested greater blocks - let (remaining, (children, _exit_contents)) = - many_till(element_matcher, exit_matcher)(remaining)?; + // Check for a completely empty block + let (remaining, children) = match consumed(many_till(blank_line, exit_matcher))(remaining) { + Ok((remaining, (whitespace, (children, _exit_contents)))) => ( + remaining, + vec![Element::Paragraph(Paragraph::of_text(whitespace))], + ), + Err(_) => { + let (remaining, (children, _exit_contents)) = + many_till(element_matcher, exit_matcher)(remaining)?; + (remaining, children) + } + }; let (remaining, _end) = greater_block_end(&parser_context, remaining)?; // Not checking if parent exit matcher is causing exit because the greater_block_end matcher asserts we matched a full greater block @@ -60,10 +81,6 @@ pub fn greater_block<'r, 's>( let (remaining, _trailing_ws) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?; - let parameters = match parameters { - Some((_ws, parameters)) => Some(parameters), - None => None, - }; let source = get_consumed(input, remaining); Ok(( remaining, diff --git a/src/parser/lesser_element.rs b/src/parser/lesser_element.rs index 110e647..6d041b4 100644 --- a/src/parser/lesser_element.rs +++ b/src/parser/lesser_element.rs @@ -1,4 +1,4 @@ -use super::object::Object; +use super::object::{Object, TextMarkup}; #[derive(Debug)] pub struct Paragraph<'s> { @@ -10,3 +10,14 @@ pub struct Paragraph<'s> { pub struct Comment<'s> { pub source: &'s str, } + +impl<'s> Paragraph<'s> { + pub fn of_text(input: &'s str) -> Self { + let mut objects = Vec::with_capacity(1); + objects.push(Object::TextMarkup(TextMarkup { source: input })); + Paragraph { + source: input, + children: objects, + } + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7b32ed3..8f63930 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3,6 +3,7 @@ mod document; mod drawer; mod element; mod error; +mod exiting; mod footnote_definition; mod greater_block; mod greater_element; diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index 3881fe8..c350529 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -6,8 +6,8 @@ use nom::multi::many1; use nom::multi::many_till; use nom::sequence::tuple; +use crate::parser::exiting::ExitClass; use crate::parser::object::standard_set_object; -use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; @@ -26,7 +26,8 @@ use super::Context; pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Paragraph<'s>> { let parser_context = context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { - exit_matcher: ChainBehavior::AndParent(Some(¶graph_end)), + class: ExitClass::Beta, + exit_matcher: ¶graph_end, })); let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 9f578f9..e113d21 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -1,14 +1,14 @@ use std::rc::Rc; -use nom::combinator::eof; -use nom::IResult; - use super::error::CustomError; use super::error::MyError; use super::error::Res; use super::list::List; use super::list::Node; use super::Context; +use crate::parser::exiting::ExitClass; +use nom::combinator::eof; +use nom::IResult; type Matcher = dyn for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str>; @@ -76,33 +76,20 @@ impl<'r, 's> ContextTree<'r, 's> { // exit_matcher: ChainBehavior::IgnoreParent(Some(&always_fail)), // })); + let mut current_class_filter = ExitClass::Beta; for current_node in self.iter() { let context_element = current_node.get_data(); match context_element { ContextElement::ExitMatcherNode(exit_matcher) => { - match exit_matcher.exit_matcher { - ChainBehavior::AndParent(Some(matcher)) => { - let local_context = ContextTree::branch_from(current_node); - let local_result = matcher(&local_context, i); - if local_result.is_ok() { - return local_result; - } + if exit_matcher.class as u32 <= current_class_filter as u32 { + current_class_filter = exit_matcher.class; + let local_context = ContextTree::branch_from(current_node); + let local_result = (exit_matcher.exit_matcher)(&local_context, i); + if local_result.is_ok() { + return local_result; } - ChainBehavior::AndParent(None) => {} - ChainBehavior::IgnoreParent(Some(matcher)) => { - let local_context = ContextTree::branch_from(current_node); - let local_result = matcher(&local_context, i); - if local_result.is_ok() { - return local_result; - } - // TODO: Make this a specific error instead of just a generic MyError - return Err(nom::Err::Error(CustomError::MyError(MyError("NoExit")))); - } - ChainBehavior::IgnoreParent(None) => { - // TODO: Make this a specific error instead of just a generic MyError - return Err(nom::Err::Error(CustomError::MyError(MyError("NoExit")))); - } - }; + } + } _ => {} }; @@ -166,28 +153,15 @@ pub enum ContextElement<'r, 's> { ConsumeTrailingWhitespace(bool), } -#[derive(Debug)] pub struct ExitMatcherNode<'r> { - pub exit_matcher: ChainBehavior<'r>, + pub exit_matcher: &'r Matcher, + pub class: ExitClass, } -#[derive(Clone)] -pub enum ChainBehavior<'r> { - AndParent(Option<&'r Matcher>), - IgnoreParent(Option<&'r Matcher>), -} - -impl<'r> std::fmt::Debug for ChainBehavior<'r> { +impl<'r> std::fmt::Debug for ExitMatcherNode<'r> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut formatter = f.debug_struct("ChainBehavior"); - // match self { - // ChainBehavior::AndParent(_) => { - // formatter = formatter.field("type", &"AndParent"); - // } - // ChainBehavior::IgnoreParent(_) => { - // formatter = formatter.field("type", &"IgnoreParent"); - // } - // }; + let mut formatter = f.debug_struct("ExitMatcherNode"); + formatter.field("class", &self.class.to_string()); formatter.finish() } } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 133015c..f474022 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -8,7 +8,7 @@ use super::util::maybe_consume_trailing_whitespace_if_not_exiting; use super::util::non_whitespace_character; use super::Context; use crate::parser::element::element; -use crate::parser::parser_context::ChainBehavior; +use crate::parser::exiting::ExitClass; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::util::blank_line; @@ -37,7 +37,8 @@ use tracing::span; pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> { let parser_context = context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { - exit_matcher: ChainBehavior::AndParent(Some(&plain_list_end)), + class: ExitClass::Beta, + exit_matcher: &plain_list_end, })); let without_consume_context = parser_context.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false)); @@ -152,12 +153,14 @@ pub fn plain_list_item<'r, 's>( .with_additional_node(ContextElement::ConsumeTrailingWhitespace(true)) .with_additional_node(ContextElement::ListItem(indent_level)) .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { - exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)), + class: ExitClass::Beta, + exit_matcher: &plain_list_item_end, })); let without_consume_context = context .with_additional_node(ContextElement::ListItem(indent_level)) .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { - exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)), + class: ExitClass::Beta, + exit_matcher: &plain_list_item_end, })); let with_consume_matcher = parser_with_context!(element)(&with_consume_context); diff --git a/toy_language.txt b/toy_language.txt index 24d76ec..419e66b 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -2,4 +2,7 @@ foo #+begin_center + + + #+end_center