diff --git a/org_mode_samples/greater_block/Makefile b/org_mode_samples/greater_block/Makefile new file mode 100644 index 00000000..c47a86c1 --- /dev/null +++ b/org_mode_samples/greater_block/Makefile @@ -0,0 +1,23 @@ +SHELL := bash +.ONESHELL: +.SHELLFLAGS := -eu -o pipefail -c +.DELETE_ON_ERROR: +MAKEFLAGS += --warn-undefined-variables +MAKEFLAGS += --no-builtin-rules +SRCFILES := $(wildcard *.org) +OUTFILES := $(patsubst %.org,%.tree.txt,$(SRCFILES)) + +ifeq ($(origin .RECIPEPREFIX), undefined) + $(error This Make does not support .RECIPEPREFIX. Please use GNU Make 4.0 or later) +endif +.RECIPEPREFIX = > + +.PHONY: all +all: $(OUTFILES) + +.PHONY: clean +clean: +> rm -rf $(OUTFILES) + +%.tree.txt: %.org ../common.el ../dump_org_ast.bash +> ../dump_org_ast.bash $< $@ diff --git a/org_mode_samples/greater_block/indented.org b/org_mode_samples/greater_block/indented.org new file mode 100644 index 00000000..95400ac1 --- /dev/null +++ b/org_mode_samples/greater_block/indented.org @@ -0,0 +1,3 @@ + #+begin_center +foo +#+end_center diff --git a/org_mode_samples/greater_block/nested.org b/org_mode_samples/greater_block/nested.org new file mode 100644 index 00000000..1607d041 --- /dev/null +++ b/org_mode_samples/greater_block/nested.org @@ -0,0 +1,7 @@ +#+begin_center +foo +#+begin_quote +bar +#+end_quote +baz +#+end_center diff --git a/src/main.rs b/src/main.rs index cc494715..8f1e4f15 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,6 @@ use crate::parser::document; use tracing_subscriber::EnvFilter; mod parser; -use tracing_subscriber::fmt; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -13,18 +12,19 @@ fn main() -> Result<(), Box> { let parsed = document(TEST_DOC); println!("{}\n\n\n", TEST_DOC); println!("{:#?}", parsed); + opentelemetry::global::shutdown_tracer_provider(); Ok(()) } fn init_telemetry() -> Result<(), Box> { let env_filter = EnvFilter::try_from_default_env().unwrap_or(EnvFilter::new("WARN")); - let stdout = fmt::Layer::new() - .pretty() - .with_file(true) - .with_line_number(true) - .with_thread_ids(false) - .with_target(false); + // let stdout = tracing_subscriber::fmt::Layer::new() + // .pretty() + // .with_file(true) + // .with_line_number(true) + // .with_thread_ids(false) + // .with_target(false); opentelemetry::global::set_text_map_propagator(opentelemetry_jaeger::Propagator::new()); let tracer = opentelemetry_jaeger::new_pipeline() @@ -36,7 +36,7 @@ fn init_telemetry() -> Result<(), Box> { tracing_subscriber::registry() .with(env_filter) .with(opentelemetry) - .with(stdout) + // .with(stdout) .try_init()?; Ok(()) } diff --git a/src/parser/element.rs b/src/parser/element.rs index db304976..49f543d8 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -1,4 +1,6 @@ use super::error::Res; +use super::greater_block::greater_block; +use super::greater_element::GreaterBlock; use super::greater_element::PlainList; use super::lesser_element::Paragraph; use super::paragraph::paragraph; @@ -13,6 +15,7 @@ use nom::combinator::map; pub enum Element<'s> { Paragraph(Paragraph<'s>), PlainList(PlainList<'s>), + GreaterBlock(GreaterBlock<'s>), /// The whitespace that follows an element. /// /// This isn't a real org-mode element. Except for items in plain lists, trailing blank lines belong to the preceding element. It is a separate `Element` in this enum to make parsing easier. @@ -24,6 +27,7 @@ impl<'s> Source<'s> for Element<'s> { match self { Element::Paragraph(obj) => obj.source, Element::PlainList(obj) => obj.source, + Element::GreaterBlock(obj) => obj.source, Element::TrailingWhitespace(src) => src, } } @@ -45,5 +49,9 @@ pub fn non_paragraph_element<'r, 's>( input: &'s str, ) -> Res<&'s str, Element<'s>> { let plain_list_matcher = parser_with_context!(plain_list)(context); - map(plain_list_matcher, Element::PlainList)(input) + let greater_block_matcher = parser_with_context!(greater_block)(context); + alt(( + map(plain_list_matcher, Element::PlainList), + map(greater_block_matcher, Element::GreaterBlock), + ))(input) } diff --git a/src/parser/greater_block.rs b/src/parser/greater_block.rs new file mode 100644 index 00000000..3fce48cb --- /dev/null +++ b/src/parser/greater_block.rs @@ -0,0 +1,105 @@ +use super::error::Res; +use super::Context; +use crate::parser::element::element; +use crate::parser::error::CustomError; +use crate::parser::error::MyError; +use crate::parser::greater_element::GreaterBlock; +use crate::parser::parser_context::ChainBehavior; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::exit_matcher_parser; +use crate::parser::util::get_consumed; +use crate::parser::util::start_of_line; +use nom::branch::alt; +use nom::bytes::complete::is_not; +use nom::bytes::complete::tag_no_case; +use nom::character::complete::line_ending; +use nom::character::complete::space0; +use nom::character::complete::space1; +use nom::combinator::eof; +use nom::combinator::opt; +use nom::combinator::verify; +use nom::multi::many_till; +use nom::sequence::tuple; + +#[tracing::instrument(ret, level = "debug")] +pub fn greater_block<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, GreaterBlock<'s>> { + start_of_line(context, input)?; + let (remaining, _leading_whitespace) = space0(input)?; + // TODO: Not handling indentation before start of block + let (remaining, (_begin, name)) = tuple(( + tag_no_case("#+begin_"), + verify(name, |name: &str| match name.to_lowercase().as_str() { + "comment" | "example" | "export" | "src" | "verse" => false, + _ => true, + }), + ))(remaining)?; + let (remaining, parameters) = opt(tuple((space1, parameters)))(remaining)?; + let (remaining, _nl) = line_ending(remaining)?; + let parser_context = context + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + exit_matcher: ChainBehavior::IgnoreParent(Some(&greater_block_end)), + })) + .with_additional_node(ContextElement::GreaterBlock(name)); + + let element_matcher = parser_with_context!(element)(&parser_context); + let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); + // TODO: Not handling nested greater blocks + let (remaining, (children, _exit_contents)) = + many_till(element_matcher, exit_matcher)(remaining)?; + let (remaining, _end) = greater_block_end(&parser_context, remaining)?; + let parameters = match parameters { + Some((_ws, parameters)) => Some(parameters), + None => None, + }; + let source = get_consumed(input, remaining); + Ok(( + remaining, + GreaterBlock { + source, + name, + parameters, + children, + }, + )) +} + +#[tracing::instrument(ret, level = "debug")] +fn name<'s>(input: &'s str) -> Res<&'s str, &'s str> { + is_not(" \t\r\n")(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn parameters<'s>(input: &'s str) -> Res<&'s str, &'s str> { + is_not("\r\n")(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn greater_block_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + start_of_line(context, input)?; + let current_name: &str = get_context_greater_block_name(context).ok_or(nom::Err::Error( + CustomError::MyError(MyError("Not inside a greater block")), + ))?; + let (remaining, _leading_whitespace) = space0(input)?; + let (remaining, (_begin, _name, _ws)) = tuple(( + tag_no_case("#+end_"), + tag_no_case(current_name), + alt((eof, line_ending)), + ))(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} + +fn get_context_greater_block_name<'r, 's>(context: Context<'r, 's>) -> Option<&'s str> { + for thing in context.iter() { + match thing.get_data() { + ContextElement::GreaterBlock(name) => return Some(name), + _ => {} + }; + } + None +} diff --git a/src/parser/greater_element.rs b/src/parser/greater_element.rs index 59717d73..9e828c75 100644 --- a/src/parser/greater_element.rs +++ b/src/parser/greater_element.rs @@ -11,5 +11,13 @@ pub struct PlainListItem<'s> { pub source: &'s str, pub indentation: usize, pub bullet: &'s str, - pub contents: Vec>, + pub children: Vec>, +} + +#[derive(Debug)] +pub struct GreaterBlock<'s> { + pub source: &'s str, + pub name: &'s str, + pub parameters: Option<&'s str>, + pub children: Vec>, } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 276be7d2..fbb3fe7a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,6 +1,7 @@ mod document; mod element; mod error; +mod greater_block; mod greater_element; mod lesser_element; mod list; diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 5070d035..5bbb8a72 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -128,6 +128,9 @@ pub enum ContextElement<'r, 's> { /// Stores the indentation level of the current list item ListItem(usize), + + /// Stores the name of the greater block + GreaterBlock(&'s str), } #[derive(Debug)] @@ -138,8 +141,6 @@ pub struct ExitMatcherNode<'r> { #[derive(Clone)] pub enum ChainBehavior<'r> { AndParent(Option<&'r Matcher>), - - #[allow(dead_code)] // Will be used when inside code/quote blocks IgnoreParent(Option<&'r Matcher>), } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index b0a0a60e..02cf02e4 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -26,19 +26,29 @@ use nom::combinator::verify; use nom::multi::many_till; use nom::sequence::tuple; -#[tracing::instrument(ret, level = "debug")] pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> { - let (remaining, first_item) = plain_list_item(context, input)?; + let (mut remaining, first_item) = plain_list_item(context, input)?; + let first_item_indentation = first_item.indentation; let plain_list_item_matcher = parser_with_context!(plain_list_item)(context); let exit_matcher = parser_with_context!(exit_matcher_parser)(context); - let (remaining, (mut children, _exit_contents)) = many_till( - verify(plain_list_item_matcher, |pli| { - pli.indentation == first_item.indentation - }), - exit_matcher, - )(remaining)?; + let mut children = Vec::new(); + children.push(first_item); + loop { + let exit_contents = exit_matcher(remaining); + if exit_contents.is_ok() { + break; + } + + let next_list_item = plain_list_item_matcher(remaining); + match next_list_item { + Ok((remain, next_child)) if next_child.indentation == first_item_indentation => { + children.push(next_child); + remaining = remain; + } + Ok(_) | Err(_) => break, + }; + } let source = get_consumed(input, remaining); - children.insert(0, first_item); Ok((remaining, PlainList { source, children })) } @@ -71,7 +81,7 @@ pub fn plain_list_item<'r, 's>( source, indentation: indent_level, bullet: bull, - contents: Vec::new(), + children: Vec::new(), }, )); } @@ -86,7 +96,7 @@ pub fn plain_list_item<'r, 's>( source, indentation: indent_level, bullet: bull, - contents, + children: contents, }, )); } diff --git a/toy_language.txt b/toy_language.txt index c5060874..e3c62aa3 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -24,3 +24,7 @@ foo bar 1. This is a list immediately after a paragraph 2. This is a second item in the list 1. This is a child of the second item +#+begin_center +1. foo +2. bar +#+end_center