From ab09edb5de8da73a6890d119800166773c4abd17 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 13:16:28 -0400 Subject: [PATCH 01/29] Initial structures for plain lists. --- src/parser/greater_element.rs | 10 ++++++++++ src/parser/mod.rs | 1 + src/parser/plain_list.rs | 1 + 3 files changed, 12 insertions(+) create mode 100644 src/parser/plain_list.rs diff --git a/src/parser/greater_element.rs b/src/parser/greater_element.rs index 38d2a7a..51fbf5d 100644 --- a/src/parser/greater_element.rs +++ b/src/parser/greater_element.rs @@ -1,4 +1,14 @@ +use super::element::Element; + #[derive(Debug)] pub struct PlainList<'s> { pub source: &'s str, + pub children: Vec>, +} + +#[derive(Debug)] +pub struct PlainListItem<'s> { + pub source: &'s str, + pub bullet: &'s str, + pub contents: Vec>, } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ca10ae3..bb384e4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9,6 +9,7 @@ mod object; mod paragraph; mod parser_context; mod parser_with_context; +mod plain_list; mod plain_text; mod source; mod util; diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/parser/plain_list.rs @@ -0,0 +1 @@ + From 4a863e92ff44febcc28e709f8421ed74b105653a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 13:25:20 -0400 Subject: [PATCH 02/29] Add a test case showing only paragraphs exist on the first line for plain lists. --- .../{README.org => README.txt} | 0 .../plain_lists/start_nested_list_same_line.README.txt | 1 + .../plain_lists/start_nested_list_same_line.org | 4 ++++ src/parser/plain_list.rs | 10 ++++++++++ 4 files changed, 15 insertions(+) rename org_mode_samples/exit_matcher_investigation/{README.org => README.txt} (100%) create mode 100644 org_mode_samples/plain_lists/start_nested_list_same_line.README.txt create mode 100644 org_mode_samples/plain_lists/start_nested_list_same_line.org diff --git a/org_mode_samples/exit_matcher_investigation/README.org b/org_mode_samples/exit_matcher_investigation/README.txt similarity index 100% rename from org_mode_samples/exit_matcher_investigation/README.org rename to org_mode_samples/exit_matcher_investigation/README.txt diff --git a/org_mode_samples/plain_lists/start_nested_list_same_line.README.txt b/org_mode_samples/plain_lists/start_nested_list_same_line.README.txt new file mode 100644 index 0000000..4253282 --- /dev/null +++ b/org_mode_samples/plain_lists/start_nested_list_same_line.README.txt @@ -0,0 +1 @@ +Seems like the only element that can exist on the same line as the opening of an item is a paragraph. Perhaps all other elements should have a start of line matcher at the beginning of their parser to force this? diff --git a/org_mode_samples/plain_lists/start_nested_list_same_line.org b/org_mode_samples/plain_lists/start_nested_list_same_line.org new file mode 100644 index 0000000..ece6657 --- /dev/null +++ b/org_mode_samples/plain_lists/start_nested_list_same_line.org @@ -0,0 +1,4 @@ +1. regular + 1. nested list +2. 1. Sameline +3. | table| diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 8b13789..4c2040d 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -1 +1,11 @@ +use super::error::Res; +use super::lesser_element::Paragraph; +use super::Context; +#[allow(dead_code)] +pub fn plain_list_item<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, Paragraph<'s>> { + todo!() +} From e6752b9d83a8fbb2919a977e622e67240d4e0318 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 14:10:22 -0400 Subject: [PATCH 03/29] Building the plain list item context. --- Makefile | 12 +++-- src/parser/document.rs | 27 +---------- src/parser/parser_context.rs | 5 ++ src/parser/plain_list.rs | 64 +++++++++++++++++++++++++ src/parser/util.rs | 30 ++++++++++++ {src/parser => trash}/old_combinator.rs | 0 {src/parser => trash}/old_document.rs | 0 7 files changed, 110 insertions(+), 28 deletions(-) rename {src/parser => trash}/old_combinator.rs (100%) rename {src/parser => trash}/old_document.rs (100%) diff --git a/Makefile b/Makefile index b90d38b..cdc52da 100644 --- a/Makefile +++ b/Makefile @@ -11,14 +11,20 @@ endif .RECIPEPREFIX = > .PHONY: build -build: target/debug/toy +build: +> cargo build .PHONY: clean clean: > cargo clean -target/debug/toy: -> cargo build +.PHONY: test +test: +> cargo test + +.PHONY: run +run: +> cargo run .PHONY: jaeger jaeger: diff --git a/src/parser/document.rs b/src/parser/document.rs index 389bd29..0b80bdb 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -14,8 +14,6 @@ use nom::multi::many1_count; use nom::sequence::tuple; use crate::parser::element::element; -use crate::parser::error::CustomError; -use crate::parser::error::MyError; use crate::parser::object::standard_set_object; use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; @@ -28,7 +26,7 @@ use super::object::Object; use super::parser_with_context::parser_with_context; use super::source::Source; use super::util::get_consumed; -use super::util::get_one_before; +use super::util::start_of_line; use super::util::trailing_whitespace; use super::Context; @@ -117,7 +115,6 @@ fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Hea not(|i| context.check_exit_matcher(i))(input)?; let (remaining, (star_count, _ws, title, _ws2)) = headline(context, input)?; let section_matcher = parser_with_context!(section)(context); - // TODO: This needs to only match headings below the current level let heading_matcher = parser_with_context!(heading)(context); let (remaining, children) = many0(alt(( map( @@ -159,26 +156,6 @@ fn headline<'r, 's>( Ok((remaining, (star_count, ws, title, ws2))) } -fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { +fn headline_end<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { alt((line_ending, eof))(input) } - -/// Check that we are at the start of a line -fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); - match preceding_character { - Some('\n') => {} - Some(_) => { - // Not at start of line, cannot be a heading - return Err(nom::Err::Error(CustomError::MyError(MyError( - "Not at start of line", - )))); - } - // If None, we are at the start of the file which allows for headings - None => {} - }; - Ok((input, ())) -} diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index fd2b405..e0cc0ef 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -106,9 +106,14 @@ impl<'r, 's> ContextTree<'r, 's> { #[derive(Debug)] pub enum ContextElement<'r, 's> { + /// Stores a reference to the entire org-mode document being parsed. + /// + /// This is used for look-behind. DocumentRoot(&'s str), ExitMatcherNode(ExitMatcherNode<'r>), Context(&'r str), + + /// Stores the indentation level of the current list item ListItem(usize), } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 4c2040d..13e5b69 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -1,5 +1,22 @@ +use nom::branch::alt; +use nom::character::complete::space0; +use nom::combinator::eof; +use nom::combinator::not; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::sequence::tuple; + +use crate::parser::parser_context::ChainBehavior; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::util::start_of_line; + +use super::error::CustomError; +use super::error::MyError; use super::error::Res; use super::lesser_element::Paragraph; +use super::parser_with_context::parser_with_context; +use super::util::non_whitespace_character; use super::Context; #[allow(dead_code)] @@ -7,5 +24,52 @@ pub fn plain_list_item<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Paragraph<'s>> { + not(|i| context.check_exit_matcher(i))(input)?; + start_of_line(context, input)?; + let (remaining, leading_whitespace) = space0(input)?; + // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) + let indent_level = leading_whitespace.len(); + let list_item_context = context + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)), + })) + .with_additional_node(ContextElement::ListItem(indent_level)); todo!() } + +fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let plain_list_item_matcher = parser_with_context!(plain_list_item)(context); + let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context); + alt(( + recognize(plain_list_item_matcher), + line_indented_lte_matcher, + eof, + ))(input) +} + +fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let current_item_indent_level: &usize = + get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError( + "Not inside a plain list item", + ))))?; + + start_of_line(context, input)?; + + let matched = recognize(verify( + tuple((space0::<&str, _>, non_whitespace_character)), + // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) + |(_space0, _anychar)| _space0.len() <= *current_item_indent_level, + ))(input)?; + + Ok(matched) +} + +fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> { + for thing in context.iter() { + match thing.get_data() { + ContextElement::ListItem(depth) => return Some(depth), + _ => {} + }; + } + None +} diff --git a/src/parser/util.rs b/src/parser/util.rs index 9962dc7..8fe28ee 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -1,5 +1,6 @@ use nom::branch::alt; use nom::character::complete::line_ending; +use nom::character::complete::none_of; use nom::character::complete::space0; use nom::combinator::eof; use nom::combinator::not; @@ -7,6 +8,8 @@ use nom::combinator::recognize; use nom::multi::many0; use nom::sequence::tuple; +use super::error::CustomError; +use super::error::MyError; use super::error::Res; use super::parser_context::ContextElement; use super::Context; @@ -76,6 +79,33 @@ pub fn trailing_whitespace(input: &str) -> Res<&str, &str> { alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input) } +/// Check that we are at the start of a line +pub fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + match preceding_character { + Some('\n') => {} + Some(_) => { + // Not at start of line, cannot be a heading + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Not at start of line", + )))); + } + // If None, we are at the start of the file which allows for headings + None => {} + }; + Ok((input, ())) +} + +/// Pull one non-whitespace character. +/// +/// This function only operates on spaces, tabs, carriage returns, and line feeds. It does not handle fancy unicode whitespace. +pub fn non_whitespace_character(input: &str) -> Res<&str, char> { + none_of(" \t\r\n")(input) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/parser/old_combinator.rs b/trash/old_combinator.rs similarity index 100% rename from src/parser/old_combinator.rs rename to trash/old_combinator.rs diff --git a/src/parser/old_document.rs b/trash/old_document.rs similarity index 100% rename from src/parser/old_document.rs rename to trash/old_document.rs From 422535fbe4457d275ea1ea4635937401c2d34259 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 14:23:52 -0400 Subject: [PATCH 04/29] Wrote a plain list item parser. --- .../plain_lists/empty_list_item.org | 3 ++ src/parser/plain_list.rs | 40 +++++++++++++++++-- 2 files changed, 39 insertions(+), 4 deletions(-) create mode 100644 org_mode_samples/plain_lists/empty_list_item.org diff --git a/org_mode_samples/plain_lists/empty_list_item.org b/org_mode_samples/plain_lists/empty_list_item.org new file mode 100644 index 0000000..0fe3a9a --- /dev/null +++ b/org_mode_samples/plain_lists/empty_list_item.org @@ -0,0 +1,3 @@ +1. +2. +3. diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 13e5b69..085e152 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -1,20 +1,26 @@ use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::digit1; +use nom::character::complete::one_of; use nom::character::complete::space0; use nom::combinator::eof; use nom::combinator::not; use nom::combinator::recognize; use nom::combinator::verify; +use nom::multi::many0; use nom::sequence::tuple; +use crate::parser::element::element; use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::util::get_consumed; use crate::parser::util::start_of_line; use super::error::CustomError; use super::error::MyError; use super::error::Res; -use super::lesser_element::Paragraph; +use super::greater_element::PlainListItem; use super::parser_with_context::parser_with_context; use super::util::non_whitespace_character; use super::Context; @@ -23,18 +29,44 @@ use super::Context; pub fn plain_list_item<'r, 's>( context: Context<'r, 's>, input: &'s str, -) -> Res<&'s str, Paragraph<'s>> { +) -> Res<&'s str, PlainListItem<'s>> { not(|i| context.check_exit_matcher(i))(input)?; start_of_line(context, input)?; let (remaining, leading_whitespace) = space0(input)?; // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) let indent_level = leading_whitespace.len(); - let list_item_context = context + let parser_context = context .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)), })) .with_additional_node(ContextElement::ListItem(indent_level)); - todo!() + + let element_matcher = parser_with_context!(element)(&parser_context); + let (remaining, (bull, _ws)) = tuple((bullet, space0))(remaining)?; + let (remaining, contents) = many0(element_matcher)(remaining)?; + let source = get_consumed(input, remaining); + + Ok(( + remaining, + PlainListItem { + source, + bullet: bull, + contents, + }, + )) +} + +fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> { + alt(( + tag("*"), + tag("-"), + tag("+"), + recognize(tuple((counter, alt((tag("."), tag(")")))))), + ))(i) +} + +fn counter<'s>(i: &'s str) -> Res<&'s str, &'s str> { + alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i) } fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { From fc9d131740fd9a56ed2889e40182e10014d02fd0 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 14:28:48 -0400 Subject: [PATCH 05/29] Add basic test for plain list item. --- src/parser/plain_list.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 085e152..d3177c6 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -20,11 +20,17 @@ use crate::parser::util::start_of_line; use super::error::CustomError; use super::error::MyError; use super::error::Res; +use super::greater_element::PlainList; use super::greater_element::PlainListItem; use super::parser_with_context::parser_with_context; use super::util::non_whitespace_character; use super::Context; +#[allow(dead_code)] +pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> { + todo!() +} + #[allow(dead_code)] pub fn plain_list_item<'r, 's>( context: Context<'r, 's>, @@ -105,3 +111,24 @@ fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize } None } + +#[cfg(test)] +mod tests { + use crate::parser::parser_context::ContextElement; + use crate::parser::parser_context::ContextTree; + use crate::parser::parser_with_context::parser_with_context; + + use super::*; + + #[test] + fn plain_list_item_empty() { + let input = "1."; + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let document_context = + initial_context.with_additional_node(ContextElement::DocumentRoot(input)); + let plain_list_item_matcher = parser_with_context!(plain_list_item)(&document_context); + let (remaining, result) = plain_list_item_matcher(input).unwrap(); + assert_eq!(remaining, ""); + assert_eq!(result.source, "1."); + } +} From d2923bfc0f37e8985371c51ca024f0e2295ce5d3 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 14:45:35 -0400 Subject: [PATCH 06/29] Add a special case exit matcher for end of file. --- src/parser/parser_context.rs | 7 +++++++ src/parser/plain_list.rs | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index e0cc0ef..8f4ac3a 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -1,5 +1,6 @@ use std::rc::Rc; +use nom::combinator::eof; use nom::IResult; use super::error::CustomError; @@ -57,6 +58,12 @@ impl<'r, 's> ContextTree<'r, 's> { &'r self, i: &'s str, ) -> IResult<&'s str, &'s str, CustomError<&'s str>> { + // Special check for EOF. We don't just make this a document-level exit matcher since the IgnoreParent ChainBehavior could cause early exit matchers to not run. + let at_end_of_file = eof(i); + if at_end_of_file.is_ok() { + return at_end_of_file; + } + for current_node in self.iter() { let context_element = current_node.get_data(); match context_element { diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index d3177c6..805e3c5 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -131,4 +131,16 @@ mod tests { assert_eq!(remaining, ""); assert_eq!(result.source, "1."); } + + #[test] + fn plain_list_item_simple() { + let input = "1. foo"; + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let document_context = + initial_context.with_additional_node(ContextElement::DocumentRoot(input)); + let plain_list_item_matcher = parser_with_context!(plain_list_item)(&document_context); + let (remaining, result) = plain_list_item_matcher(input).unwrap(); + assert_eq!(remaining, ""); + assert_eq!(result.source, "1. foo"); + } } From e7b95ea59c2de51e6d18a69cdab8569b41db18e1 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 17:00:56 -0400 Subject: [PATCH 07/29] Add more jaeger commands to the Makefile. --- Makefile | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cdc52da..aa2b8a3 100644 --- a/Makefile +++ b/Makefile @@ -28,4 +28,12 @@ run: .PHONY: jaeger jaeger: -> docker run -d --rm -p 6831:6831/udp -p 6832:6832/udp -p 16686:16686 -p 14268:14268 jaegertracing/all-in-one:latest +> docker run -d --rm --name toylanguagedocker -p 6831:6831/udp -p 6832:6832/udp -p 16686:16686 -p 14268:14268 jaegertracing/all-in-one:latest + +.PHONY: jaegerweb +jaegerweb: +> xdg-open 'http://localhost:16686' + +.PHONY: jaegerstop +jaegerstop: +> docker stop toylanguagedocker From 188fdaacbc7484fe6d90c33fb1b35e420eeda9cf Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 12:07:40 -0400 Subject: [PATCH 08/29] Remove dead line of code. --- src/parser/plain_text.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/parser/plain_text.rs b/src/parser/plain_text.rs index 1330cea..87d592b 100644 --- a/src/parser/plain_text.rs +++ b/src/parser/plain_text.rs @@ -13,7 +13,6 @@ pub fn plain_text<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s "Zero input length to plain_text.", )))); } - // not(|i| context.check_exit_matcher(i))(input)?; let mut current_input = input.char_indices(); loop { match current_input.next() { From e7397f818d60caeb70d8836689d012ae60e4cabb Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 12:33:58 -0400 Subject: [PATCH 09/29] Adding some notes on the exit matcher loop issue. --- notes/exit_matcher_loop_notes.txt | 35 +++++++++++++++++++ .../paragraph_with_immediate_list.org | 2 ++ 2 files changed, 37 insertions(+) create mode 100644 notes/exit_matcher_loop_notes.txt create mode 100644 org_mode_samples/paragraphs/paragraph_with_immediate_list.org diff --git a/notes/exit_matcher_loop_notes.txt b/notes/exit_matcher_loop_notes.txt new file mode 100644 index 0000000..5e357ae --- /dev/null +++ b/notes/exit_matcher_loop_notes.txt @@ -0,0 +1,35 @@ +Headings add exit matcher for heading + +Paragraphs add exit matcher for elements (but it should be sans paragraph) + + + + +* foo +* bar +* baz + +context tree -> () + +match * foo + +context tree -> exit(heading matcher) + +check exit +invoke heading matcher +check exit +invoke heading matcher +check exit +invoke heading matcher +adds second heading matcher exit + + +Ways around this: +- Always parse SOMETHING before checking for exit + - Doesn't always seem possible +- Disable exit matchers during exit check + - Seems like it would break syntax +- Have separate parsers for the beginning of the exit condition (for example, checking for just the headline instead of the full heading parser) + - Won't be possible with paragraphs ending at any other element +- Check exit matchers in parent parser + - Will this work? seems like it would just create larger loops diff --git a/org_mode_samples/paragraphs/paragraph_with_immediate_list.org b/org_mode_samples/paragraphs/paragraph_with_immediate_list.org new file mode 100644 index 0000000..2271d13 --- /dev/null +++ b/org_mode_samples/paragraphs/paragraph_with_immediate_list.org @@ -0,0 +1,2 @@ +foo bar baz +1. lorem From 81a9a754dec34f73bb167eb105aa87bb8b340d4c Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 12:52:49 -0400 Subject: [PATCH 10/29] I seem to have solved the infinite loop issue by moving the exit check into the plain list parser. --- src/parser/greater_element.rs | 1 + src/parser/plain_list.rs | 41 +++++++++++++++++++++++++++++++++-- src/parser/util.rs | 9 ++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/parser/greater_element.rs b/src/parser/greater_element.rs index 51fbf5d..59717d7 100644 --- a/src/parser/greater_element.rs +++ b/src/parser/greater_element.rs @@ -9,6 +9,7 @@ pub struct PlainList<'s> { #[derive(Debug)] pub struct PlainListItem<'s> { pub source: &'s str, + pub indentation: usize, pub bullet: &'s str, pub contents: Vec>, } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 805e3c5..3afc628 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -8,12 +8,14 @@ use nom::combinator::not; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many0; +use nom::multi::many_till; use nom::sequence::tuple; use crate::parser::element::element; use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::start_of_line; @@ -28,7 +30,18 @@ use super::Context; #[allow(dead_code)] pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> { - todo!() + let (remaining, first_item) = plain_list_item(context, input)?; + let plain_list_item_matcher = parser_with_context!(plain_list_item)(context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(context); + let (remaining, (mut children, _exit_contents)) = many_till( + verify(plain_list_item_matcher, |pli| { + pli.indentation == first_item.indentation + }), + exit_matcher, + )(remaining)?; + let source = get_consumed(input, remaining); + children.insert(0, first_item); + Ok((remaining, PlainList { source, children })) } #[allow(dead_code)] @@ -36,7 +49,6 @@ pub fn plain_list_item<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, PlainListItem<'s>> { - not(|i| context.check_exit_matcher(i))(input)?; start_of_line(context, input)?; let (remaining, leading_whitespace) = space0(input)?; // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) @@ -56,6 +68,7 @@ pub fn plain_list_item<'r, 's>( remaining, PlainListItem { source, + indentation: indent_level, bullet: bull, contents, }, @@ -143,4 +156,28 @@ mod tests { assert_eq!(remaining, ""); assert_eq!(result.source, "1. foo"); } + + #[test] + fn plain_list_empty() { + let input = "1."; + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let document_context = + initial_context.with_additional_node(ContextElement::DocumentRoot(input)); + let plain_list_matcher = parser_with_context!(plain_list)(&document_context); + let (remaining, result) = plain_list_matcher(input).unwrap(); + assert_eq!(remaining, ""); + assert_eq!(result.source, "1."); + } + + #[test] + fn plain_list_simple() { + let input = "1. foo"; + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let document_context = + initial_context.with_additional_node(ContextElement::DocumentRoot(input)); + let plain_list_matcher = parser_with_context!(plain_list)(&document_context); + let (remaining, result) = plain_list_matcher(input).unwrap(); + assert_eq!(remaining, ""); + assert_eq!(result.source, "1. foo"); + } } diff --git a/src/parser/util.rs b/src/parser/util.rs index 8fe28ee..b8a0e21 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -4,6 +4,7 @@ use nom::character::complete::none_of; use nom::character::complete::space0; use nom::combinator::eof; use nom::combinator::not; +use nom::combinator::peek; use nom::combinator::recognize; use nom::multi::many0; use nom::sequence::tuple; @@ -106,6 +107,14 @@ pub fn non_whitespace_character(input: &str) -> Res<&str, char> { none_of(" \t\r\n")(input) } +/// Check that we are at the start of a line +pub fn exit_matcher_parser<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + peek(|i| context.check_exit_matcher(i))(input) +} + #[cfg(test)] mod tests { use super::*; From 22a2ed29f170b8d29383287ffb72b724cf377013 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 13:06:41 -0400 Subject: [PATCH 11/29] Integrate plain list parser into the parser tree. --- src/parser/document.rs | 8 +++++++- src/parser/element.rs | 24 +++++++++++++++++------- src/parser/paragraph.rs | 9 +++++++-- src/parser/plain_list.rs | 39 ++++++++++++++++++--------------------- 4 files changed, 49 insertions(+), 31 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index 0b80bdb..160f507 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -11,6 +11,7 @@ use nom::combinator::verify; use nom::multi::many0; use nom::multi::many1; use nom::multi::many1_count; +use nom::multi::many_till; use nom::sequence::tuple; use crate::parser::element::element; @@ -25,6 +26,7 @@ use super::error::Res; use super::object::Object; use super::parser_with_context::parser_with_context; use super::source::Source; +use super::util::exit_matcher_parser; use super::util::get_consumed; use super::util::start_of_line; use super::util::trailing_whitespace; @@ -101,7 +103,11 @@ fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Sec .with_additional_node(ContextElement::Context("section")); not(|i| parser_context.check_exit_matcher(i))(input)?; let element_matcher = parser_with_context!(element)(&parser_context); - let (remaining, children) = many1(element_matcher)(input)?; + let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + let (remaining, (children, _exit_contents)) = verify( + many_till(element_matcher, exit_matcher), + |(children, _exit_contents)| !children.is_empty(), + )(input)?; let source = get_consumed(input, remaining); Ok((remaining, Section { source, children })) } diff --git a/src/parser/element.rs b/src/parser/element.rs index 4b60800..7b4f883 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -1,13 +1,13 @@ -use crate::parser::parser_with_context::parser_with_context; -use nom::combinator::map; -use nom::combinator::not; - use super::error::Res; use super::greater_element::PlainList; use super::lesser_element::Paragraph; use super::paragraph::paragraph; +use super::plain_list::plain_list; use super::source::Source; use super::Context; +use crate::parser::parser_with_context::parser_with_context; +use nom::branch::alt; +use nom::combinator::map; #[derive(Debug)] pub enum Element<'s> { @@ -25,9 +25,19 @@ impl<'s> Source<'s> for Element<'s> { } pub fn element<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Element<'s>> { - not(|i| context.check_exit_matcher(i))(input)?; - + let non_paragraph_matcher = parser_with_context!(non_paragraph_element)(context); let paragraph_matcher = parser_with_context!(paragraph)(context); - map(paragraph_matcher, Element::Paragraph)(input) + alt(( + non_paragraph_matcher, + map(paragraph_matcher, Element::Paragraph), + ))(input) +} + +pub fn non_paragraph_element<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, Element<'s>> { + let plain_list_matcher = parser_with_context!(plain_list)(context); + map(plain_list_matcher, Element::PlainList)(input) } diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index d63500e..be96c61 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -11,6 +11,7 @@ use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; +use super::element::non_paragraph_element; use super::error::Res; use super::lesser_element::Paragraph; use super::util::blank_line; @@ -35,6 +36,10 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st } fn paragraph_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { - // TODO: Other elements should also end paragraphs - alt((recognize(tuple((line_ending, many1(blank_line)))), eof))(input) + let non_paragraph_element_matcher = parser_with_context!(non_paragraph_element)(context); + alt(( + recognize(tuple((line_ending, many1(blank_line)))), + recognize(non_paragraph_element_matcher), + eof, + ))(input) } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 3afc628..32115cb 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -1,24 +1,3 @@ -use nom::branch::alt; -use nom::bytes::complete::tag; -use nom::character::complete::digit1; -use nom::character::complete::one_of; -use nom::character::complete::space0; -use nom::combinator::eof; -use nom::combinator::not; -use nom::combinator::recognize; -use nom::combinator::verify; -use nom::multi::many0; -use nom::multi::many_till; -use nom::sequence::tuple; - -use crate::parser::element::element; -use crate::parser::parser_context::ChainBehavior; -use crate::parser::parser_context::ContextElement; -use crate::parser::parser_context::ExitMatcherNode; -use crate::parser::util::exit_matcher_parser; -use crate::parser::util::get_consumed; -use crate::parser::util::start_of_line; - use super::error::CustomError; use super::error::MyError; use super::error::Res; @@ -27,6 +6,24 @@ use super::greater_element::PlainListItem; use super::parser_with_context::parser_with_context; use super::util::non_whitespace_character; use super::Context; +use crate::parser::element::element; +use crate::parser::parser_context::ChainBehavior; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::util::exit_matcher_parser; +use crate::parser::util::get_consumed; +use crate::parser::util::start_of_line; +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::digit1; +use nom::character::complete::one_of; +use nom::character::complete::space0; +use nom::combinator::eof; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many0; +use nom::multi::many_till; +use nom::sequence::tuple; #[allow(dead_code)] pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> { From 5db4e07c9935ed029c1a2f28225350bd5e404b34 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 14:03:52 -0400 Subject: [PATCH 12/29] Expand the sample text. --- toy_language.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/toy_language.txt b/toy_language.txt index ac4a2d6..fd2a6a5 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -20,3 +20,8 @@ body of heading ** Immediate second child heading * Second top-level heading + +foo bar +1. This is a list immediately after a paragraph +2. This is a second item in the list + 1. This is a child of the second item From a77d2655bdee3aeffd14b2aeedf0e5086cee18e9 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 15:08:29 -0400 Subject: [PATCH 13/29] Instrument the code. --- Cargo.toml | 2 +- Makefile | 4 ++++ src/main.rs | 18 ++++++++++++------ src/parser/document.rs | 9 ++++++++- src/parser/util.rs | 1 + toy_language.txt | 22 ---------------------- 6 files changed, 26 insertions(+), 30 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 76745b7..bc11dfc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,6 @@ path = "src/main.rs" [dependencies] nom = "7.1.1" tracing = "0.1.37" -tracing-subscriber = "0.3.16" +tracing-subscriber = {version="0.3.16", features=["env-filter"]} [features] diff --git a/Makefile b/Makefile index aa2b8a3..6e63e0c 100644 --- a/Makefile +++ b/Makefile @@ -26,6 +26,10 @@ test: run: > cargo run +.PHONY: debug +debug: +> RUST_LOG=debug cargo run + .PHONY: jaeger jaeger: > docker run -d --rm --name toylanguagedocker -p 6831:6831/udp -p 6832:6832/udp -p 16686:16686 -p 14268:14268 jaegertracing/all-in-one:latest diff --git a/src/main.rs b/src/main.rs index 6906a5b..2d4a3dc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,26 +1,32 @@ #![feature(round_char_boundary)] use crate::parser::document; -use tracing::Level; use tracing_subscriber::fmt::format::FmtSpan; - +use tracing_subscriber::EnvFilter; mod parser; const TEST_DOC: &'static str = include_str!("../toy_language.txt"); fn main() -> Result<(), Box> { + init_telemetry()?; + let parsed = document(TEST_DOC); + println!("{}\n\n\n", TEST_DOC); + println!("{:#?}", parsed); + Ok(()) +} + +fn init_telemetry() -> Result<(), Box> { + let env_filter = EnvFilter::try_from_default_env().unwrap_or(EnvFilter::new("WARN")); let format = tracing_subscriber::fmt::format() + .pretty() .with_file(true) .with_line_number(true) .with_thread_ids(false) .with_target(false); let subscriber = tracing_subscriber::fmt() .event_format(format) - .with_max_level(Level::TRACE) .with_span_events(FmtSpan::ENTER | FmtSpan::EXIT) + .with_env_filter(env_filter) .finish(); tracing::subscriber::set_global_default(subscriber)?; - let parsed = document(TEST_DOC); - println!("{}\n\n\n", TEST_DOC); - println!("{:#?}", parsed); Ok(()) } diff --git a/src/parser/document.rs b/src/parser/document.rs index 160f507..133ddb7 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -74,6 +74,7 @@ impl<'s> Source<'s> for DocumentElement<'s> { } } +#[tracing::instrument(ret, level = "debug")] #[allow(dead_code)] pub fn document(input: &str) -> Res<&str, Document> { let initial_context: ContextTree<'_, '_> = ContextTree::new(); @@ -82,7 +83,8 @@ pub fn document(input: &str) -> Res<&str, Document> { let section_matcher = parser_with_context!(section)(&document_context); let heading_matcher = parser_with_context!(heading)(&document_context); let (remaining, zeroth_section) = opt(section_matcher)(input)?; - let (remaining, children) = many0(heading_matcher)(remaining)?; + // let (remaining, children) = many0(heading_matcher)(remaining)?; + let children = Vec::new(); let source = get_consumed(input, remaining); Ok(( remaining, @@ -94,6 +96,7 @@ pub fn document(input: &str) -> Res<&str, Document> { )) } +#[tracing::instrument(ret, level = "debug")] fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Section<'s>> { // TODO: The zeroth section is specialized so it probably needs its own parser let parser_context = context @@ -112,11 +115,13 @@ fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Sec Ok((remaining, Section { source, children })) } +#[tracing::instrument(ret, level = "debug")] fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { let headline_matcher = parser_with_context!(headline)(context); alt((recognize(headline_matcher), eof))(input) } +#[tracing::instrument(ret, level = "debug")] fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Heading<'s>> { not(|i| context.check_exit_matcher(i))(input)?; let (remaining, (star_count, _ws, title, _ws2)) = headline(context, input)?; @@ -141,6 +146,7 @@ fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Hea )) } +#[tracing::instrument(ret, level = "debug")] fn headline<'r, 's>( context: Context<'r, 's>, input: &'s str, @@ -162,6 +168,7 @@ fn headline<'r, 's>( Ok((remaining, (star_count, ws, title, ws2))) } +#[tracing::instrument(ret, level = "debug")] fn headline_end<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { alt((line_ending, eof))(input) } diff --git a/src/parser/util.rs b/src/parser/util.rs index b8a0e21..c4f57e5 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -108,6 +108,7 @@ pub fn non_whitespace_character(input: &str) -> Res<&str, char> { } /// Check that we are at the start of a line +#[tracing::instrument(ret, level = "debug")] pub fn exit_matcher_parser<'r, 's>( context: Context<'r, 's>, input: &'s str, diff --git a/toy_language.txt b/toy_language.txt index fd2a6a5..de546e9 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -1,26 +1,4 @@ -prologue *goes here* I guess *bold -text* - -bold*wont* start *or stop*when there is text outside it - -I guess *regular - -text* - -[foo *bar] baz* car - - -*nesting *bold entrances* and* exits - -* Heading - -body of heading - -** Child heading -** Immediate second child heading - * Second top-level heading - foo bar 1. This is a list immediately after a paragraph 2. This is a second item in the list From cddefdb96376c2e2ac3b1735dd0220d76a27d988 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 16:53:38 -0400 Subject: [PATCH 14/29] Remove unnecessary exit matcher check. --- src/parser/document.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index 133ddb7..79a161c 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -104,7 +104,6 @@ fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Sec exit_matcher: ChainBehavior::AndParent(Some(§ion_end)), })) .with_additional_node(ContextElement::Context("section")); - not(|i| parser_context.check_exit_matcher(i))(input)?; let element_matcher = parser_with_context!(element)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); let (remaining, (children, _exit_contents)) = verify( From e1fbe36297874908b75f3061e715ba591d08aacd Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 17:06:22 -0400 Subject: [PATCH 15/29] Exporting traces to jaeger. --- Cargo.toml | 3 +++ src/main.rs | 26 ++++++++++++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bc11dfc..627ec01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,10 @@ path = "src/main.rs" [dependencies] nom = "7.1.1" +opentelemetry = "0.17.0" +opentelemetry-jaeger = "0.16.0" tracing = "0.1.37" +tracing-opentelemetry = "0.17.2" tracing-subscriber = {version="0.3.16", features=["env-filter"]} [features] diff --git a/src/main.rs b/src/main.rs index 2d4a3dc..cc49471 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,10 @@ #![feature(round_char_boundary)] use crate::parser::document; -use tracing_subscriber::fmt::format::FmtSpan; use tracing_subscriber::EnvFilter; mod parser; +use tracing_subscriber::fmt; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::util::SubscriberInitExt; const TEST_DOC: &'static str = include_str!("../toy_language.txt"); @@ -16,17 +18,25 @@ fn main() -> Result<(), Box> { fn init_telemetry() -> Result<(), Box> { let env_filter = EnvFilter::try_from_default_env().unwrap_or(EnvFilter::new("WARN")); - let format = tracing_subscriber::fmt::format() + + let stdout = fmt::Layer::new() .pretty() .with_file(true) .with_line_number(true) .with_thread_ids(false) .with_target(false); - let subscriber = tracing_subscriber::fmt() - .event_format(format) - .with_span_events(FmtSpan::ENTER | FmtSpan::EXIT) - .with_env_filter(env_filter) - .finish(); - tracing::subscriber::set_global_default(subscriber)?; + + opentelemetry::global::set_text_map_propagator(opentelemetry_jaeger::Propagator::new()); + let tracer = opentelemetry_jaeger::new_pipeline() + .with_service_name("toy_language") + .install_simple()?; + + let opentelemetry = tracing_opentelemetry::layer().with_tracer(tracer); + + tracing_subscriber::registry() + .with(env_filter) + .with(opentelemetry) + .with(stdout) + .try_init()?; Ok(()) } From 028946ec901c1b8a08c6a9a4e3c274b3adaff2d7 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 18:08:17 -0400 Subject: [PATCH 16/29] Identified the problem. The issue is plain text is eating the line break so paragraph is failing since it expects a line break at the end. --- src/parser/document.rs | 3 +-- src/parser/element.rs | 1 + src/parser/object.rs | 1 + src/parser/paragraph.rs | 2 ++ src/parser/parser_context.rs | 10 ++++++++-- src/parser/plain_list.rs | 8 ++++++-- src/parser/plain_text.rs | 1 + src/parser/util.rs | 11 +++++++++++ 8 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index 79a161c..dd5eb67 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -83,8 +83,7 @@ pub fn document(input: &str) -> Res<&str, Document> { let section_matcher = parser_with_context!(section)(&document_context); let heading_matcher = parser_with_context!(heading)(&document_context); let (remaining, zeroth_section) = opt(section_matcher)(input)?; - // let (remaining, children) = many0(heading_matcher)(remaining)?; - let children = Vec::new(); + let (remaining, children) = many0(heading_matcher)(remaining)?; let source = get_consumed(input, remaining); Ok(( remaining, diff --git a/src/parser/element.rs b/src/parser/element.rs index 7b4f883..d0f2ee0 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -24,6 +24,7 @@ impl<'s> Source<'s> for Element<'s> { } } +#[tracing::instrument(ret, level = "debug")] pub fn element<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Element<'s>> { let non_paragraph_matcher = parser_with_context!(non_paragraph_element)(context); let paragraph_matcher = parser_with_context!(paragraph)(context); diff --git a/src/parser/object.rs b/src/parser/object.rs index d94e783..50afeb4 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -39,6 +39,7 @@ impl<'s> Source<'s> for Object<'s> { } } +#[tracing::instrument(ret, level = "debug")] pub fn standard_set_object<'r, 's>( context: Context<'r, 's>, input: &'s str, diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index be96c61..feb1e57 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -19,6 +19,7 @@ use super::util::get_consumed; use super::util::trailing_whitespace; use super::Context; +#[tracing::instrument(ret, level = "debug")] pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Paragraph<'s>> { let parser_context = context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { @@ -35,6 +36,7 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st Ok((remaining, Paragraph { source, children })) } +#[tracing::instrument(ret, level = "debug")] fn paragraph_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { let non_paragraph_element_matcher = parser_with_context!(non_paragraph_element)(context); alt(( diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 8f4ac3a..df39b3b 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -8,6 +8,7 @@ use super::error::MyError; use super::error::Res; use super::list::List; use super::list::Node; +use super::util::always_fail; use super::Context; type Matcher = dyn for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str>; @@ -64,20 +65,25 @@ impl<'r, 's> ContextTree<'r, 's> { return at_end_of_file; } + let blocked_context = + self.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + exit_matcher: ChainBehavior::IgnoreParent(Some(&always_fail)), + })); + for current_node in self.iter() { let context_element = current_node.get_data(); match context_element { ContextElement::ExitMatcherNode(exit_matcher) => { match exit_matcher.exit_matcher { ChainBehavior::AndParent(Some(matcher)) => { - let local_result = matcher(self, i); + let local_result = matcher(&blocked_context, i); if local_result.is_ok() { return local_result; } } ChainBehavior::AndParent(None) => {} ChainBehavior::IgnoreParent(Some(matcher)) => { - let local_result = matcher(self, i); + let local_result = matcher(&blocked_context, i); if local_result.is_ok() { return local_result; } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 32115cb..08cf419 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -25,7 +25,7 @@ use nom::multi::many0; use nom::multi::many_till; use nom::sequence::tuple; -#[allow(dead_code)] +#[tracing::instrument(ret, level = "debug")] pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> { let (remaining, first_item) = plain_list_item(context, input)?; let plain_list_item_matcher = parser_with_context!(plain_list_item)(context); @@ -41,7 +41,7 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s Ok((remaining, PlainList { source, children })) } -#[allow(dead_code)] +#[tracing::instrument(ret, level = "debug")] pub fn plain_list_item<'r, 's>( context: Context<'r, 's>, input: &'s str, @@ -72,6 +72,7 @@ pub fn plain_list_item<'r, 's>( )) } +#[tracing::instrument(ret, level = "debug")] fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> { alt(( tag("*"), @@ -81,10 +82,12 @@ fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> { ))(i) } +#[tracing::instrument(ret, level = "debug")] fn counter<'s>(i: &'s str) -> Res<&'s str, &'s str> { alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i) } +#[tracing::instrument(ret, level = "debug")] fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { let plain_list_item_matcher = parser_with_context!(plain_list_item)(context); let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context); @@ -95,6 +98,7 @@ fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res< ))(input) } +#[tracing::instrument(ret, level = "debug")] fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { let current_item_indent_level: &usize = get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError( diff --git a/src/parser/plain_text.rs b/src/parser/plain_text.rs index 87d592b..3eb7697 100644 --- a/src/parser/plain_text.rs +++ b/src/parser/plain_text.rs @@ -7,6 +7,7 @@ use super::error::Res; use super::object::PlainText; use super::Context; +#[tracing::instrument(ret, level = "debug")] pub fn plain_text<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainText<'s>> { if input.len() == 0 { return Err(nom::Err::Error(CustomError::MyError(MyError( diff --git a/src/parser/util.rs b/src/parser/util.rs index c4f57e5..a6f1b6e 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -71,16 +71,19 @@ pub fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { /// A line containing only whitespace and then a line break /// /// It is up to the caller to ensure this is called at the start of a line. +#[tracing::instrument(ret, level = "debug")] pub fn blank_line(input: &str) -> Res<&str, &str> { not(eof)(input)?; recognize(tuple((space0, alt((line_ending, eof)))))(input) } +#[tracing::instrument(ret, level = "debug")] pub fn trailing_whitespace(input: &str) -> Res<&str, &str> { alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input) } /// Check that we are at the start of a line +#[tracing::instrument(ret, level = "debug")] pub fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { let document_root = context.get_document_root().unwrap(); let preceding_character = get_one_before(document_root, input) @@ -103,6 +106,7 @@ pub fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&' /// Pull one non-whitespace character. /// /// This function only operates on spaces, tabs, carriage returns, and line feeds. It does not handle fancy unicode whitespace. +#[tracing::instrument(ret, level = "debug")] pub fn non_whitespace_character(input: &str) -> Res<&str, char> { none_of(" \t\r\n")(input) } @@ -116,6 +120,13 @@ pub fn exit_matcher_parser<'r, 's>( peek(|i| context.check_exit_matcher(i))(input) } +#[tracing::instrument(ret, level = "debug")] +pub fn always_fail<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + Err(nom::Err::Error(CustomError::MyError(MyError( + "Always fail", + )))) +} + #[cfg(test)] mod tests { use super::*; From 2c7a559869049e7f5e28831d7321787f82061e08 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 18:10:14 -0400 Subject: [PATCH 17/29] Fix the line break consumption issue. This leaves us with an issue of lists becoming needlessly nested. --- src/parser/paragraph.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index feb1e57..0c916a4 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -41,7 +41,7 @@ fn paragraph_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st let non_paragraph_element_matcher = parser_with_context!(non_paragraph_element)(context); alt(( recognize(tuple((line_ending, many1(blank_line)))), - recognize(non_paragraph_element_matcher), + recognize(tuple((line_ending, non_paragraph_element_matcher))), eof, ))(input) } From 3d8fe253c91f38a7ee69b159dd50ba7c55ed57ff Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 18:22:08 -0400 Subject: [PATCH 18/29] Check for exit matcher between elements in a plain list item. --- src/parser/parser_context.rs | 1 + src/parser/plain_list.rs | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index df39b3b..445884a 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -55,6 +55,7 @@ impl<'r, 's> ContextTree<'r, 's> { self.tree.into_iter_until(&other.tree) } + #[tracing::instrument(ret, level = "debug")] pub fn check_exit_matcher( &'r self, i: &'s str, diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 08cf419..9e46121 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -57,8 +57,10 @@ pub fn plain_list_item<'r, 's>( .with_additional_node(ContextElement::ListItem(indent_level)); let element_matcher = parser_with_context!(element)(&parser_context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); let (remaining, (bull, _ws)) = tuple((bullet, space0))(remaining)?; - let (remaining, contents) = many0(element_matcher)(remaining)?; + let (remaining, (contents, _exit_contents)) = + many_till(element_matcher, exit_matcher)(remaining)?; let source = get_consumed(input, remaining); Ok(( From 3643f91bac8fd80622f095e99e8d2fa2eaad3a6a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 18:50:25 -0400 Subject: [PATCH 19/29] The current problem is whitespace at the end of a list item should not be consumed. --- src/parser/paragraph.rs | 9 ++++++++- src/parser/plain_list.rs | 5 +++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index 0c916a4..b0e6d2f 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -2,7 +2,9 @@ use nom::branch::alt; use nom::character::complete::line_ending; use nom::combinator::eof; use nom::combinator::recognize; +use nom::combinator::verify; use nom::multi::many1; +use nom::multi::many_till; use nom::sequence::tuple; use crate::parser::object::standard_set_object; @@ -10,6 +12,7 @@ use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::exit_matcher_parser; use super::element::non_paragraph_element; use super::error::Res; @@ -26,8 +29,12 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st exit_matcher: ChainBehavior::AndParent(Some(¶graph_end)), })); let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - let (remaining, children) = many1(standard_set_object_matcher)(input)?; + let (remaining, (children, _exit_contents)) = verify( + many_till(standard_set_object_matcher, exit_matcher), + |(children, _exit_contents)| !children.is_empty(), + )(input)?; let (remaining, _trailing_whitespace) = trailing_whitespace(remaining)?; diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 9e46121..9743fd6 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -16,6 +16,7 @@ use crate::parser::util::start_of_line; use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::digit1; +use nom::character::complete::line_ending; use nom::character::complete::one_of; use nom::character::complete::space0; use nom::combinator::eof; @@ -94,8 +95,8 @@ fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res< let plain_list_item_matcher = parser_with_context!(plain_list_item)(context); let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context); alt(( - recognize(plain_list_item_matcher), - line_indented_lte_matcher, + recognize(tuple((line_ending, plain_list_item_matcher))), + recognize(tuple((line_ending, line_indented_lte_matcher))), eof, ))(input) } From 9545990b522f11224ba5c6fa75e19fe80374e9f3 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 19:12:20 -0400 Subject: [PATCH 20/29] Regurgitate seems to have made all text a paragraph. --- src/parser/plain_list.rs | 2 ++ src/parser/util.rs | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 9743fd6..ed7a310 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -12,6 +12,7 @@ use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; +use crate::parser::util::regurgitate; use crate::parser::util::start_of_line; use nom::branch::alt; use nom::bytes::complete::tag; @@ -62,6 +63,7 @@ pub fn plain_list_item<'r, 's>( let (remaining, (bull, _ws)) = tuple((bullet, space0))(remaining)?; let (remaining, (contents, _exit_contents)) = many_till(element_matcher, exit_matcher)(remaining)?; + let remaining = regurgitate(input, remaining); let source = get_consumed(input, remaining); Ok(( diff --git a/src/parser/util.rs b/src/parser/util.rs index a6f1b6e..9804b13 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -127,6 +127,31 @@ pub fn always_fail<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s )))) } +/// Walk backwards unconsuming blank lines and line endings. +/// +/// List items are a special case where the trailing blank lines do not belong to it, unlike all other elements. Rather than write that special logic into each child parser, this just walks backwards through the consumed input to unconsume trailing blank lines and line breaks. +pub fn regurgitate<'s>(input: &'s str, remaining: &'s str) -> &'s str { + assert!(is_slice_of(input, remaining)); + let mut offset = remaining.as_ptr() as usize - input.as_ptr() as usize; + let source = &input[..offset]; + let mut char_indices = source.char_indices().rev(); + loop { + match char_indices.next() { + Some((off, chr)) => { + if chr == '\n' { + offset = off; + } else if chr != ' ' && chr != '\t' { + return &input[offset..]; + } + } + None => { + // It was all whitespace, so return the full input string + return input; + } + }; + } +} + #[cfg(test)] mod tests { use super::*; @@ -140,4 +165,14 @@ mod tests { assert!(is_slice_of(input, yellow_heart)); assert_eq!(yellow_heart, "๐Ÿ’›"); } + + #[test] + fn regurgitate_unicode() { + let input = "๐Ÿงก๐Ÿ’›\n\t \t \n\n๐Ÿ’š๐Ÿ’™๐Ÿ’œ"; + let (green_heart_index, _) = input.char_indices().skip(12).next().unwrap(); + let starting_with_green_heart = &input[green_heart_index..]; + let after_yellow = regurgitate(input, starting_with_green_heart); + assert!(is_slice_of(input, after_yellow)); + assert_eq!(after_yellow, "\n\t \t \n\n๐Ÿ’š๐Ÿ’™๐Ÿ’œ"); + } } From 775e703aded7ef27420161ef5c60e429a5370c56 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 27 Mar 2023 19:19:51 -0400 Subject: [PATCH 21/29] Not sure whats going on. --- src/parser/plain_list.rs | 8 ++++++-- src/parser/util.rs | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index ed7a310..453ba2c 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -14,6 +14,7 @@ use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::regurgitate; use crate::parser::util::start_of_line; +use crate::parser::util::trailing_whitespace; use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::digit1; @@ -38,6 +39,7 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s }), exit_matcher, )(remaining)?; + let (remaining, _trailing_whitespace) = trailing_whitespace(remaining)?; let source = get_consumed(input, remaining); children.insert(0, first_item); Ok((remaining, PlainList { source, children })) @@ -61,8 +63,10 @@ pub fn plain_list_item<'r, 's>( let element_matcher = parser_with_context!(element)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); let (remaining, (bull, _ws)) = tuple((bullet, space0))(remaining)?; - let (remaining, (contents, _exit_contents)) = - many_till(element_matcher, exit_matcher)(remaining)?; + let (remaining, (contents, _exit_contents)) = many_till(element_matcher, |i| { + let with_whitespace_added_back = regurgitate(input, i); + exit_matcher(with_whitespace_added_back) + })(remaining)?; let remaining = regurgitate(input, remaining); let source = get_consumed(input, remaining); diff --git a/src/parser/util.rs b/src/parser/util.rs index 9804b13..ecd0844 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -130,6 +130,7 @@ pub fn always_fail<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s /// Walk backwards unconsuming blank lines and line endings. /// /// List items are a special case where the trailing blank lines do not belong to it, unlike all other elements. Rather than write that special logic into each child parser, this just walks backwards through the consumed input to unconsume trailing blank lines and line breaks. +#[tracing::instrument(ret, level = "debug")] pub fn regurgitate<'s>(input: &'s str, remaining: &'s str) -> &'s str { assert!(is_slice_of(input, remaining)); let mut offset = remaining.as_ptr() as usize - input.as_ptr() as usize; From 602cf4c374766ccdf2417cb7cf34b3305bc97d89 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 31 Mar 2023 09:54:48 -0400 Subject: [PATCH 22/29] Removing regurgitate calls. This hacky solution ends up with whitespace getting captured twice so I will need to either use context or a separate parser. --- src/parser/plain_list.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 453ba2c..c3d50db 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -63,11 +63,8 @@ pub fn plain_list_item<'r, 's>( let element_matcher = parser_with_context!(element)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); let (remaining, (bull, _ws)) = tuple((bullet, space0))(remaining)?; - let (remaining, (contents, _exit_contents)) = many_till(element_matcher, |i| { - let with_whitespace_added_back = regurgitate(input, i); - exit_matcher(with_whitespace_added_back) - })(remaining)?; - let remaining = regurgitate(input, remaining); + let (remaining, (contents, _exit_contents)) = + many_till(element_matcher, exit_matcher)(remaining)?; let source = get_consumed(input, remaining); Ok(( From 707eac5bf810dffe7cac8ca22d11a489f8c51edd Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 31 Mar 2023 11:16:37 -0400 Subject: [PATCH 23/29] Move trailing whitespace parsing to a separate element. I still need to parse the line break at the end of elements. --- src/parser/document.rs | 30 ++++++++++++++++++++++++++++-- src/parser/element.rs | 5 +++++ src/parser/paragraph.rs | 2 -- src/parser/plain_list.rs | 1 - src/parser/util.rs | 9 +++++++++ 5 files changed, 42 insertions(+), 5 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index dd5eb67..7c002c8 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -20,10 +20,12 @@ use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::util::element_trailing_whitespace; use super::element::Element; use super::error::Res; use super::object::Object; +use super::parser_context; use super::parser_with_context::parser_with_context; use super::source::Source; use super::util::exit_matcher_parser; @@ -105,12 +107,36 @@ fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Sec .with_additional_node(ContextElement::Context("section")); let element_matcher = parser_with_context!(element)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + let trailing_matcher = parser_with_context!(element_trailing_whitespace)(&parser_context); let (remaining, (children, _exit_contents)) = verify( - many_till(element_matcher, exit_matcher), + many_till( + tuple(( + element_matcher, + opt(map(trailing_matcher, Element::TrailingWhitespace)), + )), + exit_matcher, + ), |(children, _exit_contents)| !children.is_empty(), )(input)?; + let flattened_children: Vec = children + .into_iter() + .flat_map(|tpl| { + let mut flattened_children = Vec::with_capacity(2); + flattened_children.push(tpl.0); + if let Some(bar) = tpl.1 { + flattened_children.push(bar); + } + flattened_children.into_iter() + }) + .collect(); let source = get_consumed(input, remaining); - Ok((remaining, Section { source, children })) + Ok(( + remaining, + Section { + source, + children: flattened_children, + }, + )) } #[tracing::instrument(ret, level = "debug")] diff --git a/src/parser/element.rs b/src/parser/element.rs index d0f2ee0..db30497 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -13,6 +13,10 @@ use nom::combinator::map; pub enum Element<'s> { Paragraph(Paragraph<'s>), PlainList(PlainList<'s>), + /// The whitespace that follows an element. + /// + /// This isn't a real org-mode element. Except for items in plain lists, trailing blank lines belong to the preceding element. It is a separate `Element` in this enum to make parsing easier. + TrailingWhitespace(&'s str), } impl<'s> Source<'s> for Element<'s> { @@ -20,6 +24,7 @@ impl<'s> Source<'s> for Element<'s> { match self { Element::Paragraph(obj) => obj.source, Element::PlainList(obj) => obj.source, + Element::TrailingWhitespace(src) => src, } } } diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index b0e6d2f..09903b8 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -36,8 +36,6 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st |(children, _exit_contents)| !children.is_empty(), )(input)?; - let (remaining, _trailing_whitespace) = trailing_whitespace(remaining)?; - let source = get_consumed(input, remaining); Ok((remaining, Paragraph { source, children })) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index c3d50db..9e31428 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -39,7 +39,6 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s }), exit_matcher, )(remaining)?; - let (remaining, _trailing_whitespace) = trailing_whitespace(remaining)?; let source = get_consumed(input, remaining); children.insert(0, first_item); Ok((remaining, PlainList { source, children })) diff --git a/src/parser/util.rs b/src/parser/util.rs index ecd0844..8abbe1a 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -77,6 +77,15 @@ pub fn blank_line(input: &str) -> Res<&str, &str> { recognize(tuple((space0, alt((line_ending, eof)))))(input) } +#[tracing::instrument(ret, level = "debug")] +pub fn element_trailing_whitespace<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + start_of_line(context, input)?; + alt((eof, recognize(many0(blank_line))))(input) +} + #[tracing::instrument(ret, level = "debug")] pub fn trailing_whitespace(input: &str) -> Res<&str, &str> { alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input) From 68156f3667c3241210a741f837907e31e656da37 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 31 Mar 2023 11:42:04 -0400 Subject: [PATCH 24/29] Consume line break at the end of paragraph. --- src/parser/paragraph.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index 09903b8..764b646 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -36,6 +36,7 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st |(children, _exit_contents)| !children.is_empty(), )(input)?; + let (remaining, _linebreak) = alt((eof, line_ending))(remaining)?; let source = get_consumed(input, remaining); Ok((remaining, Paragraph { source, children })) From 2b0e88dc01395a7208349c52dc7e7a954d6b899c Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 31 Mar 2023 13:08:53 -0400 Subject: [PATCH 25/29] The current problem is plain_list_item_end is not taking into account depth. --- src/parser/paragraph.rs | 10 +++++++--- src/parser/plain_list.rs | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index 764b646..bea0908 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -1,6 +1,7 @@ use nom::branch::alt; use nom::character::complete::line_ending; use nom::combinator::eof; +use nom::combinator::peek; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many1; @@ -32,7 +33,10 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); let (remaining, (children, _exit_contents)) = verify( - many_till(standard_set_object_matcher, exit_matcher), + many_till( + standard_set_object_matcher, + peek(alt((eof, recognize(tuple((line_ending, exit_matcher)))))), + ), |(children, _exit_contents)| !children.is_empty(), )(input)?; @@ -46,8 +50,8 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st fn paragraph_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { let non_paragraph_element_matcher = parser_with_context!(non_paragraph_element)(context); alt(( - recognize(tuple((line_ending, many1(blank_line)))), - recognize(tuple((line_ending, non_paragraph_element_matcher))), + recognize(many1(blank_line)), + recognize(non_paragraph_element_matcher), eof, ))(input) } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 9e31428..2abc7ee 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -97,8 +97,8 @@ fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res< let plain_list_item_matcher = parser_with_context!(plain_list_item)(context); let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context); alt(( - recognize(tuple((line_ending, plain_list_item_matcher))), - recognize(tuple((line_ending, line_indented_lte_matcher))), + recognize(plain_list_item_matcher), + recognize(line_indented_lte_matcher), eof, ))(input) } From e681f8fdff2e07e0765227caabddf5c08434f088 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 31 Mar 2023 13:22:30 -0400 Subject: [PATCH 26/29] Remove the exit matcher block. I'm not sure this is a problem, but while I'm debugging I want this removed to be safe. --- src/parser/parser_context.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 445884a..8618e0e 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -66,10 +66,10 @@ impl<'r, 's> ContextTree<'r, 's> { return at_end_of_file; } - let blocked_context = - self.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { - exit_matcher: ChainBehavior::IgnoreParent(Some(&always_fail)), - })); + // let blocked_context = + // self.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + // exit_matcher: ChainBehavior::IgnoreParent(Some(&always_fail)), + // })); for current_node in self.iter() { let context_element = current_node.get_data(); @@ -77,14 +77,14 @@ impl<'r, 's> ContextTree<'r, 's> { ContextElement::ExitMatcherNode(exit_matcher) => { match exit_matcher.exit_matcher { ChainBehavior::AndParent(Some(matcher)) => { - let local_result = matcher(&blocked_context, i); + let local_result = matcher(self, i); if local_result.is_ok() { return local_result; } } ChainBehavior::AndParent(None) => {} ChainBehavior::IgnoreParent(Some(matcher)) => { - let local_result = matcher(&blocked_context, i); + let local_result = matcher(self, i); if local_result.is_ok() { return local_result; } From 942b4860781f78b4871418479ec0b94162d4cc45 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 31 Mar 2023 13:32:07 -0400 Subject: [PATCH 27/29] Add a test file showing lists where the earlier one is indented. --- org_mode_samples/plain_lists/indented_then_less_indented.org | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 org_mode_samples/plain_lists/indented_then_less_indented.org diff --git a/org_mode_samples/plain_lists/indented_then_less_indented.org b/org_mode_samples/plain_lists/indented_then_less_indented.org new file mode 100644 index 0000000..c325ab9 --- /dev/null +++ b/org_mode_samples/plain_lists/indented_then_less_indented.org @@ -0,0 +1,2 @@ + 1. foo +1. bar From 2552ba28d11eb2c7e7063f1fa01a879d88c5c89b Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 3 Apr 2023 15:06:12 -0400 Subject: [PATCH 28/29] Correctly parsing plain list items. --- src/parser/plain_list.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 2abc7ee..9911f3f 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -94,10 +94,16 @@ fn counter<'s>(i: &'s str) -> Res<&'s str, &'s str> { #[tracing::instrument(ret, level = "debug")] fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let current_item_indent_level: &usize = + get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError( + "Not inside a plain list item", + ))))?; let plain_list_item_matcher = parser_with_context!(plain_list_item)(context); let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context); alt(( - recognize(plain_list_item_matcher), + recognize(verify(plain_list_item_matcher, |pli| { + pli.indentation <= *current_item_indent_level + })), recognize(line_indented_lte_matcher), eof, ))(input) From 2d1df18544510a081383b4e972a1ecd0263acd73 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 3 Apr 2023 15:15:16 -0400 Subject: [PATCH 29/29] Cleanup. --- src/parser/combinator.rs | 38 ------------------------------------ src/parser/document.rs | 1 - src/parser/error.rs | 2 +- src/parser/mod.rs | 1 - src/parser/object.rs | 4 ++++ src/parser/paragraph.rs | 1 - src/parser/parser_context.rs | 4 ++-- src/parser/plain_list.rs | 4 ---- 8 files changed, 7 insertions(+), 48 deletions(-) delete mode 100644 src/parser/combinator.rs diff --git a/src/parser/combinator.rs b/src/parser/combinator.rs deleted file mode 100644 index ceab6c8..0000000 --- a/src/parser/combinator.rs +++ /dev/null @@ -1,38 +0,0 @@ -use nom::error::ParseError; -use nom::IResult; -use nom::InputLength; - -use super::Context; - -pub fn context_many1<'r: 's, 's, I, O, E, M>( - context: Context<'r, 's>, - mut many_matcher: M, -) -> impl FnMut(I) -> IResult, E> + 'r -where - I: Clone + InputLength, - E: ParseError, - M: for<'x> Fn(Context<'x, 's>, I) -> IResult + 'r, -{ - move |mut i: I| { - let mut err = None; - let mut elements: Vec = Vec::new(); - loop { - match many_matcher(&context, i.clone()) { - Ok((remaining, many_elem)) => { - i = remaining; - elements.push(many_elem); - } - the_error @ Err(_) => { - err = Some(the_error); - break; - } - } - } - if elements.is_empty() { - if let Some(err) = err { - err?; - } - } - Ok((i, elements)) - } -} diff --git a/src/parser/document.rs b/src/parser/document.rs index 7c002c8..33f683d 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -25,7 +25,6 @@ use crate::parser::util::element_trailing_whitespace; use super::element::Element; use super::error::Res; use super::object::Object; -use super::parser_context; use super::parser_with_context::parser_with_context; use super::source::Source; use super::util::exit_matcher_parser; diff --git a/src/parser/error.rs b/src/parser/error.rs index abe5d86..eb23965 100644 --- a/src/parser/error.rs +++ b/src/parser/error.rs @@ -18,7 +18,7 @@ impl ParseError for CustomError { CustomError::Nom(input, kind) } - fn append(input: I, kind: ErrorKind, mut other: Self) -> Self { + fn append(_input: I, _kind: ErrorKind, mut other: Self) -> Self { // Doesn't do append like VerboseError other } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index bb384e4..276be7d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,4 +1,3 @@ -mod combinator; mod document; mod element; mod error; diff --git a/src/parser/object.rs b/src/parser/object.rs index 50afeb4..36162a6 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -9,8 +9,12 @@ use super::Context; #[derive(Debug)] pub enum Object<'s> { + #[allow(dead_code)] TextMarkup(TextMarkup<'s>), + PlainText(PlainText<'s>), + + #[allow(dead_code)] RegularLink(RegularLink<'s>), } diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index bea0908..a6af51d 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -20,7 +20,6 @@ use super::error::Res; use super::lesser_element::Paragraph; use super::util::blank_line; use super::util::get_consumed; -use super::util::trailing_whitespace; use super::Context; #[tracing::instrument(ret, level = "debug")] diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 8618e0e..5070d03 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -8,7 +8,6 @@ use super::error::MyError; use super::error::Res; use super::list::List; use super::list::Node; -use super::util::always_fail; use super::Context; type Matcher = dyn for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str>; @@ -139,7 +138,8 @@ pub struct ExitMatcherNode<'r> { #[derive(Clone)] pub enum ChainBehavior<'r> { AndParent(Option<&'r Matcher>), - #[allow(dead_code)] + + #[allow(dead_code)] // Will be used when inside code/quote blocks IgnoreParent(Option<&'r Matcher>), } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 9911f3f..f3e077f 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -12,19 +12,15 @@ use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; -use crate::parser::util::regurgitate; use crate::parser::util::start_of_line; -use crate::parser::util::trailing_whitespace; use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::digit1; -use nom::character::complete::line_ending; use nom::character::complete::one_of; use nom::character::complete::space0; use nom::combinator::eof; use nom::combinator::recognize; use nom::combinator::verify; -use nom::multi::many0; use nom::multi::many_till; use nom::sequence::tuple;