From 9ad428bb20d6316621a25886c52d840940880d74 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 3 Apr 2023 17:33:01 -0400 Subject: [PATCH 01/13] Standardize on the children name instead of contents. --- src/parser/greater_element.rs | 10 +++++++++- src/parser/plain_list.rs | 4 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/parser/greater_element.rs b/src/parser/greater_element.rs index 59717d73..9e828c75 100644 --- a/src/parser/greater_element.rs +++ b/src/parser/greater_element.rs @@ -11,5 +11,13 @@ pub struct PlainListItem<'s> { pub source: &'s str, pub indentation: usize, pub bullet: &'s str, - pub contents: Vec>, + pub children: Vec>, +} + +#[derive(Debug)] +pub struct GreaterBlock<'s> { + pub source: &'s str, + pub name: &'s str, + pub parameters: Option<&'s str>, + pub children: Vec>, } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index b0a0a60e..e7cfef04 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -71,7 +71,7 @@ pub fn plain_list_item<'r, 's>( source, indentation: indent_level, bullet: bull, - contents: Vec::new(), + children: Vec::new(), }, )); } @@ -86,7 +86,7 @@ pub fn plain_list_item<'r, 's>( source, indentation: indent_level, bullet: bull, - contents, + children: contents, }, )); } From 9e0bea4c3e0b8ad08b45ea6bff2530995ee378e9 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 3 Apr 2023 17:36:56 -0400 Subject: [PATCH 02/13] Starting the greater block source. --- src/parser/element.rs | 3 +++ src/parser/greater_block.rs | 1 + src/parser/mod.rs | 1 + 3 files changed, 5 insertions(+) create mode 100644 src/parser/greater_block.rs diff --git a/src/parser/element.rs b/src/parser/element.rs index db304976..2568f34d 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -1,4 +1,5 @@ use super::error::Res; +use super::greater_element::GreaterBlock; use super::greater_element::PlainList; use super::lesser_element::Paragraph; use super::paragraph::paragraph; @@ -13,6 +14,7 @@ use nom::combinator::map; pub enum Element<'s> { Paragraph(Paragraph<'s>), PlainList(PlainList<'s>), + GreaterBlock(GreaterBlock<'s>), /// The whitespace that follows an element. /// /// This isn't a real org-mode element. Except for items in plain lists, trailing blank lines belong to the preceding element. It is a separate `Element` in this enum to make parsing easier. @@ -24,6 +26,7 @@ impl<'s> Source<'s> for Element<'s> { match self { Element::Paragraph(obj) => obj.source, Element::PlainList(obj) => obj.source, + Element::GreaterBlock(obj) => obj.source, Element::TrailingWhitespace(src) => src, } } diff --git a/src/parser/greater_block.rs b/src/parser/greater_block.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/src/parser/greater_block.rs @@ -0,0 +1 @@ + diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 276be7d2..fbb3fe7a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,6 +1,7 @@ mod document; mod element; mod error; +mod greater_block; mod greater_element; mod lesser_element; mod list; From 6e6ec56d1fce19f3a5ec5bee5594a2aa3c4d9e6f Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 3 Apr 2023 18:33:07 -0400 Subject: [PATCH 03/13] Basic implementation of greater block parser. --- src/parser/element.rs | 7 ++- src/parser/greater_block.rs | 88 ++++++++++++++++++++++++++++++++++++ src/parser/parser_context.rs | 5 +- 3 files changed, 97 insertions(+), 3 deletions(-) diff --git a/src/parser/element.rs b/src/parser/element.rs index 2568f34d..49f543d8 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -1,4 +1,5 @@ use super::error::Res; +use super::greater_block::greater_block; use super::greater_element::GreaterBlock; use super::greater_element::PlainList; use super::lesser_element::Paragraph; @@ -48,5 +49,9 @@ pub fn non_paragraph_element<'r, 's>( input: &'s str, ) -> Res<&'s str, Element<'s>> { let plain_list_matcher = parser_with_context!(plain_list)(context); - map(plain_list_matcher, Element::PlainList)(input) + let greater_block_matcher = parser_with_context!(greater_block)(context); + alt(( + map(plain_list_matcher, Element::PlainList), + map(greater_block_matcher, Element::GreaterBlock), + ))(input) } diff --git a/src/parser/greater_block.rs b/src/parser/greater_block.rs index 8b137891..14010072 100644 --- a/src/parser/greater_block.rs +++ b/src/parser/greater_block.rs @@ -1 +1,89 @@ +use super::error::Res; +use super::Context; +use crate::parser::element::element; +use crate::parser::error::CustomError; +use crate::parser::error::MyError; +use crate::parser::greater_element::GreaterBlock; +use crate::parser::parser_context::ChainBehavior; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::exit_matcher_parser; +use crate::parser::util::get_consumed; +use crate::parser::util::start_of_line; +use nom::branch::alt; +use nom::bytes::complete::is_not; +use nom::bytes::complete::tag_no_case; +use nom::character::complete::line_ending; +use nom::character::complete::space1; +use nom::combinator::eof; +use nom::combinator::opt; +use nom::multi::many_till; +use nom::sequence::tuple; +pub fn greater_block<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, GreaterBlock<'s>> { + let (remaining, (_begin, name)) = tuple((tag_no_case("#+begin_"), name))(input)?; + let (remaining, parameters) = opt(tuple((space1, parameters)))(remaining)?; + let (remaining, _nl) = line_ending(remaining)?; + let parser_context = context + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + exit_matcher: ChainBehavior::IgnoreParent(Some(&greater_block_end)), + })) + .with_additional_node(ContextElement::GreaterBlock(name)); + + let element_matcher = parser_with_context!(element)(&parser_context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + let (remaining, (children, _exit_contents)) = + many_till(element_matcher, exit_matcher)(remaining)?; + let (remaining, _end) = greater_block_end(&parser_context, remaining)?; + let parameters = match parameters { + Some((_ws, parameters)) => Some(parameters), + None => None, + }; + let source = get_consumed(input, remaining); + Ok(( + remaining, + GreaterBlock { + source, + name, + parameters, + children, + }, + )) +} + +fn name<'s>(input: &'s str) -> Res<&'s str, &'s str> { + is_not(" \t\r\n")(input) +} + +fn parameters<'s>(input: &'s str) -> Res<&'s str, &'s str> { + is_not("\r\n")(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn greater_block_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + start_of_line(context, input)?; + let current_name: &str = get_context_greater_block_name(context).ok_or(nom::Err::Error( + CustomError::MyError(MyError("Not inside a greater block")), + ))?; + let (remaining, (_begin, _name, _ws)) = tuple(( + tag_no_case("#+end_"), + tag_no_case(current_name), + alt((eof, line_ending)), + ))(input)?; + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} + +fn get_context_greater_block_name<'r, 's>(context: Context<'r, 's>) -> Option<&'s str> { + for thing in context.iter() { + match thing.get_data() { + ContextElement::GreaterBlock(name) => return Some(name), + _ => {} + }; + } + None +} diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 5070d035..5bbb8a72 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -128,6 +128,9 @@ pub enum ContextElement<'r, 's> { /// Stores the indentation level of the current list item ListItem(usize), + + /// Stores the name of the greater block + GreaterBlock(&'s str), } #[derive(Debug)] @@ -138,8 +141,6 @@ pub struct ExitMatcherNode<'r> { #[derive(Clone)] pub enum ChainBehavior<'r> { AndParent(Option<&'r Matcher>), - - #[allow(dead_code)] // Will be used when inside code/quote blocks IgnoreParent(Option<&'r Matcher>), } From 70225a668b14750e8e873ef5c488cceb616a0842 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 3 Apr 2023 18:38:35 -0400 Subject: [PATCH 04/13] Do not match lesser blocks. --- src/parser/greater_block.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/parser/greater_block.rs b/src/parser/greater_block.rs index 14010072..99e842ba 100644 --- a/src/parser/greater_block.rs +++ b/src/parser/greater_block.rs @@ -18,14 +18,22 @@ use nom::character::complete::line_ending; use nom::character::complete::space1; use nom::combinator::eof; use nom::combinator::opt; +use nom::combinator::verify; use nom::multi::many_till; use nom::sequence::tuple; +#[tracing::instrument(ret, level = "debug")] pub fn greater_block<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, GreaterBlock<'s>> { - let (remaining, (_begin, name)) = tuple((tag_no_case("#+begin_"), name))(input)?; + let (remaining, (_begin, name)) = tuple(( + tag_no_case("#+begin_"), + verify(name, |name: &str| match name.to_lowercase().as_str() { + "comment" | "example" | "export" | "src" | "verse" => false, + _ => true, + }), + ))(input)?; let (remaining, parameters) = opt(tuple((space1, parameters)))(remaining)?; let (remaining, _nl) = line_ending(remaining)?; let parser_context = context @@ -55,10 +63,12 @@ pub fn greater_block<'r, 's>( )) } +#[tracing::instrument(ret, level = "debug")] fn name<'s>(input: &'s str) -> Res<&'s str, &'s str> { is_not(" \t\r\n")(input) } +#[tracing::instrument(ret, level = "debug")] fn parameters<'s>(input: &'s str) -> Res<&'s str, &'s str> { is_not("\r\n")(input) } From 559f6e6173c1280d87cb65c4e809040d5baa56d7 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 3 Apr 2023 18:44:05 -0400 Subject: [PATCH 05/13] Greater block breaking parsing. --- toy_language.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/toy_language.txt b/toy_language.txt index c5060874..e3c62aa3 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -24,3 +24,7 @@ foo bar 1. This is a list immediately after a paragraph 2. This is a second item in the list 1. This is a child of the second item +#+begin_center +1. foo +2. bar +#+end_center From d4887ec31166243e93b91b93ee4a32d62ae14935 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 3 Apr 2023 18:52:11 -0400 Subject: [PATCH 06/13] Disable stdout for traces. Frankly the stdout traces are pretty useless if you have anything more than the most trivial amount of entries. --- src/main.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/main.rs b/src/main.rs index cc494715..bee75f95 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,6 @@ use crate::parser::document; use tracing_subscriber::EnvFilter; mod parser; -use tracing_subscriber::fmt; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -19,12 +18,12 @@ fn main() -> Result<(), Box> { fn init_telemetry() -> Result<(), Box> { let env_filter = EnvFilter::try_from_default_env().unwrap_or(EnvFilter::new("WARN")); - let stdout = fmt::Layer::new() - .pretty() - .with_file(true) - .with_line_number(true) - .with_thread_ids(false) - .with_target(false); + // let stdout = tracing_subscriber::fmt::Layer::new() + // .pretty() + // .with_file(true) + // .with_line_number(true) + // .with_thread_ids(false) + // .with_target(false); opentelemetry::global::set_text_map_propagator(opentelemetry_jaeger::Propagator::new()); let tracer = opentelemetry_jaeger::new_pipeline() @@ -36,7 +35,7 @@ fn init_telemetry() -> Result<(), Box> { tracing_subscriber::registry() .with(env_filter) .with(opentelemetry) - .with(stdout) + // .with(stdout) .try_init()?; Ok(()) } From ddea3134016431c3d03eb9c083736c5f6b158865 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 3 Apr 2023 18:57:31 -0400 Subject: [PATCH 07/13] Ensure the final span is shipped to jaeger before exiting. --- src/main.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main.rs b/src/main.rs index bee75f95..8f1e4f15 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,6 +12,7 @@ fn main() -> Result<(), Box> { let parsed = document(TEST_DOC); println!("{}\n\n\n", TEST_DOC); println!("{:#?}", parsed); + opentelemetry::global::shutdown_tracer_provider(); Ok(()) } From 9257420c8319154f66c11a0e1f94675ece4fae07 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 3 Apr 2023 19:19:33 -0400 Subject: [PATCH 08/13] Add comment defining problem. --- src/parser/plain_list.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index e7cfef04..a320cdc0 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -31,6 +31,7 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s let (remaining, first_item) = plain_list_item(context, input)?; let plain_list_item_matcher = parser_with_context!(plain_list_item)(context); let exit_matcher = parser_with_context!(exit_matcher_parser)(context); + // TODO: The problem is this expects the list to be followed by something that matches the exit matcher, but after we have the first plain list item, really anything that is not a plain list item is a good exit condition. let (remaining, (mut children, _exit_contents)) = many_till( verify(plain_list_item_matcher, |pli| { pli.indentation == first_item.indentation From d7f0a02bb89d1a1d0bdbd7b80c3a75dab0dcd7ab Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 3 Apr 2023 19:30:17 -0400 Subject: [PATCH 09/13] Fix plain list ending with another element. --- src/parser/plain_list.rs | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index a320cdc0..02cf02e4 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -26,20 +26,29 @@ use nom::combinator::verify; use nom::multi::many_till; use nom::sequence::tuple; -#[tracing::instrument(ret, level = "debug")] pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> { - let (remaining, first_item) = plain_list_item(context, input)?; + let (mut remaining, first_item) = plain_list_item(context, input)?; + let first_item_indentation = first_item.indentation; let plain_list_item_matcher = parser_with_context!(plain_list_item)(context); let exit_matcher = parser_with_context!(exit_matcher_parser)(context); - // TODO: The problem is this expects the list to be followed by something that matches the exit matcher, but after we have the first plain list item, really anything that is not a plain list item is a good exit condition. - let (remaining, (mut children, _exit_contents)) = many_till( - verify(plain_list_item_matcher, |pli| { - pli.indentation == first_item.indentation - }), - exit_matcher, - )(remaining)?; + let mut children = Vec::new(); + children.push(first_item); + loop { + let exit_contents = exit_matcher(remaining); + if exit_contents.is_ok() { + break; + } + + let next_list_item = plain_list_item_matcher(remaining); + match next_list_item { + Ok((remain, next_child)) if next_child.indentation == first_item_indentation => { + children.push(next_child); + remaining = remain; + } + Ok(_) | Err(_) => break, + }; + } let source = get_consumed(input, remaining); - children.insert(0, first_item); Ok((remaining, PlainList { source, children })) } From 7023fb46177a1d28a311b4a8b2c9491f597fe8c1 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 3 Apr 2023 19:37:51 -0400 Subject: [PATCH 10/13] Add a comment about a shortcoming. --- src/parser/greater_block.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/parser/greater_block.rs b/src/parser/greater_block.rs index 99e842ba..f8a1dff2 100644 --- a/src/parser/greater_block.rs +++ b/src/parser/greater_block.rs @@ -27,6 +27,7 @@ pub fn greater_block<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, GreaterBlock<'s>> { + // TODO: Not handling indentation before start of block let (remaining, (_begin, name)) = tuple(( tag_no_case("#+begin_"), verify(name, |name: &str| match name.to_lowercase().as_str() { @@ -44,6 +45,7 @@ pub fn greater_block<'r, 's>( let element_matcher = parser_with_context!(element)(&parser_context); let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + // TODO: Not handling nested greater blocks let (remaining, (children, _exit_contents)) = many_till(element_matcher, exit_matcher)(remaining)?; let (remaining, _end) = greater_block_end(&parser_context, remaining)?; From 352532b759aadaf20e84cafc9ee6706be1173f47 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 7 Apr 2023 16:14:08 -0400 Subject: [PATCH 11/13] Add an example showing greater blocks can be nested if they are different types. --- org_mode_samples/greater_block/Makefile | 23 +++++++++++++++++++++++ org_mode_samples/greater_block/nested.org | 7 +++++++ 2 files changed, 30 insertions(+) create mode 100644 org_mode_samples/greater_block/Makefile create mode 100644 org_mode_samples/greater_block/nested.org diff --git a/org_mode_samples/greater_block/Makefile b/org_mode_samples/greater_block/Makefile new file mode 100644 index 00000000..c47a86c1 --- /dev/null +++ b/org_mode_samples/greater_block/Makefile @@ -0,0 +1,23 @@ +SHELL := bash +.ONESHELL: +.SHELLFLAGS := -eu -o pipefail -c +.DELETE_ON_ERROR: +MAKEFLAGS += --warn-undefined-variables +MAKEFLAGS += --no-builtin-rules +SRCFILES := $(wildcard *.org) +OUTFILES := $(patsubst %.org,%.tree.txt,$(SRCFILES)) + +ifeq ($(origin .RECIPEPREFIX), undefined) + $(error This Make does not support .RECIPEPREFIX. Please use GNU Make 4.0 or later) +endif +.RECIPEPREFIX = > + +.PHONY: all +all: $(OUTFILES) + +.PHONY: clean +clean: +> rm -rf $(OUTFILES) + +%.tree.txt: %.org ../common.el ../dump_org_ast.bash +> ../dump_org_ast.bash $< $@ diff --git a/org_mode_samples/greater_block/nested.org b/org_mode_samples/greater_block/nested.org new file mode 100644 index 00000000..1607d041 --- /dev/null +++ b/org_mode_samples/greater_block/nested.org @@ -0,0 +1,7 @@ +#+begin_center +foo +#+begin_quote +bar +#+end_quote +baz +#+end_center From ab329f367f3e84ad4b3c3e0073c82e0711b6c2e9 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 7 Apr 2023 16:14:57 -0400 Subject: [PATCH 12/13] Add an exmaple showing that greater blocks begin can be indented different from end. --- org_mode_samples/greater_block/indented.org | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 org_mode_samples/greater_block/indented.org diff --git a/org_mode_samples/greater_block/indented.org b/org_mode_samples/greater_block/indented.org new file mode 100644 index 00000000..95400ac1 --- /dev/null +++ b/org_mode_samples/greater_block/indented.org @@ -0,0 +1,3 @@ + #+begin_center +foo +#+end_center From 42fca12e418f7b8a18cf28b7878164f83d78dd70 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 7 Apr 2023 16:18:13 -0400 Subject: [PATCH 13/13] Add support for indenting greater blocks. --- src/parser/greater_block.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/parser/greater_block.rs b/src/parser/greater_block.rs index f8a1dff2..3fce48cb 100644 --- a/src/parser/greater_block.rs +++ b/src/parser/greater_block.rs @@ -15,6 +15,7 @@ use nom::branch::alt; use nom::bytes::complete::is_not; use nom::bytes::complete::tag_no_case; use nom::character::complete::line_ending; +use nom::character::complete::space0; use nom::character::complete::space1; use nom::combinator::eof; use nom::combinator::opt; @@ -27,6 +28,8 @@ pub fn greater_block<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, GreaterBlock<'s>> { + start_of_line(context, input)?; + let (remaining, _leading_whitespace) = space0(input)?; // TODO: Not handling indentation before start of block let (remaining, (_begin, name)) = tuple(( tag_no_case("#+begin_"), @@ -34,7 +37,7 @@ pub fn greater_block<'r, 's>( "comment" | "example" | "export" | "src" | "verse" => false, _ => true, }), - ))(input)?; + ))(remaining)?; let (remaining, parameters) = opt(tuple((space1, parameters)))(remaining)?; let (remaining, _nl) = line_ending(remaining)?; let parser_context = context @@ -81,11 +84,12 @@ fn greater_block_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&' let current_name: &str = get_context_greater_block_name(context).ok_or(nom::Err::Error( CustomError::MyError(MyError("Not inside a greater block")), ))?; + let (remaining, _leading_whitespace) = space0(input)?; let (remaining, (_begin, _name, _ws)) = tuple(( tag_no_case("#+end_"), tag_no_case(current_name), alt((eof, line_ending)), - ))(input)?; + ))(remaining)?; let source = get_consumed(input, remaining); Ok((remaining, source)) }