diff --git a/org_mode_samples/.gitignore b/org_mode_samples/.gitignore new file mode 100644 index 0000000..fcf91aa --- /dev/null +++ b/org_mode_samples/.gitignore @@ -0,0 +1 @@ +*.tree.txt diff --git a/org_mode_samples/common.el b/org_mode_samples/common.el new file mode 100644 index 0000000..8f32896 --- /dev/null +++ b/org_mode_samples/common.el @@ -0,0 +1,11 @@ +(defun org-dump-ast (outpath) + (let + ( + ;; (parsed-tree (format "%s" (org-element-parse-buffer))) + (parsed-tree (pp-to-string (org-element-parse-buffer))) + ) + (with-temp-file outpath + (insert parsed-tree) + ) + ) + ) diff --git a/org_mode_samples/dump_org_ast.bash b/org_mode_samples/dump_org_ast.bash new file mode 100755 index 0000000..8ccd247 --- /dev/null +++ b/org_mode_samples/dump_org_ast.bash @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# +set -euo pipefail +IFS=$'\n\t' +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +INPUT_FILE="$1" +OUTPUT_FILE="$2" + +INIT_SCRIPT=$(cat < + +.PHONY: all +all: paragraphs.tree.txt nested_paragraphs.tree.txt + +.PHONY: clean +clean: +> rm -rf *.tree.txt + +%.tree.txt: %.org ../common.el ../dump_org_ast.bash +> ../dump_org_ast.bash $< $@ diff --git a/org_mode_samples/plain_lists/nested_paragraphs.org b/org_mode_samples/plain_lists/nested_paragraphs.org new file mode 100644 index 0000000..22b1a61 --- /dev/null +++ b/org_mode_samples/plain_lists/nested_paragraphs.org @@ -0,0 +1,6 @@ +lorem +1. foo + 1. bar + + +baz diff --git a/org_mode_samples/plain_lists/paragraphs.org b/org_mode_samples/plain_lists/paragraphs.org new file mode 100644 index 0000000..4ed7933 --- /dev/null +++ b/org_mode_samples/plain_lists/paragraphs.org @@ -0,0 +1,7 @@ +1. foo +2. bar + baz +3. lorem + + + ipsum diff --git a/src/parser/bold.rs b/src/parser/bold.rs index 5c64cf7..a8dc00a 100644 --- a/src/parser/bold.rs +++ b/src/parser/bold.rs @@ -95,6 +95,7 @@ fn _preceded_by_whitespace<'r, 's>(context: Context<'r, 's>) -> bool { return true; } ContextElement::Context(_) => {} + ContextElement::ListItem(_) => {} } } else { break; diff --git a/src/parser/combinator.rs b/src/parser/combinator.rs index fc0ec87..4b99f84 100644 --- a/src/parser/combinator.rs +++ b/src/parser/combinator.rs @@ -46,6 +46,7 @@ where ContextElement::ExitMatcherNode(_) => None, ContextElement::Context(_) => None, ContextElement::StartOfParagraph => None, + ContextElement::ListItem(_) => None, }) .collect(); if elements.is_empty() { @@ -93,6 +94,7 @@ where }) => { ret.push(token); } + ContextElement::ListItem(_) => {} }; } ret.reverse(); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7e0fd1d..545ef9f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7,8 +7,11 @@ mod list; mod paragraph; mod parser_context; mod parser_with_context; +mod plain_list; mod text; mod token; mod util; pub use document::document; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; +pub use parser_context::ContextTree; +pub use plain_list::item; diff --git a/src/parser/paragraph.rs b/src/parser/paragraph.rs index 6097ec8..549c2d4 100644 --- a/src/parser/paragraph.rs +++ b/src/parser/paragraph.rs @@ -56,7 +56,7 @@ fn context_paragraph_end<'r, 's>( paragraph_end(input) } -fn paragraph_end(input: &str) -> Res<&str, &str> { +pub fn paragraph_end(input: &str) -> Res<&str, &str> { alt(( recognize(tuple(( map(line_break, TextElement::LineBreak), diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 32bc039..ae3ae60 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -87,6 +87,7 @@ impl<'r, 's> ContextTree<'r, 's> { ContextElement::PreviousElementNode(_) => {} ContextElement::StartOfParagraph => {} ContextElement::Context(_) => {} + ContextElement::ListItem(_) => {} }; } // TODO: Make this a specific error instead of just a generic MyError @@ -99,6 +100,7 @@ pub enum ContextElement<'r, 's> { ExitMatcherNode(ExitMatcherNode<'r>), PreviousElementNode(PreviousElementNode<'s>), Context(&'r str), + ListItem(usize), StartOfParagraph, } @@ -115,6 +117,7 @@ pub struct PreviousElementNode<'r> { #[derive(Clone)] pub enum ChainBehavior<'r> { AndParent(Option<&'r Matcher>), + #[allow(dead_code)] IgnoreParent(Option<&'r Matcher>), } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs new file mode 100644 index 0000000..a00c29f --- /dev/null +++ b/src/parser/plain_list.rs @@ -0,0 +1,161 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::anychar; +use nom::character::complete::digit1; +use nom::character::complete::line_ending; +use nom::character::complete::one_of; +use nom::character::complete::space0; +use nom::combinator::eof; +use nom::combinator::map; +use nom::combinator::not; +use nom::combinator::opt; +use nom::combinator::peek; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many1; +use nom::sequence::tuple; + +use super::combinator::context_many_till; +use super::error::CustomError; +use super::error::MyError; +use super::error::Res; +use super::parser_context::ContextElement; +use super::parser_with_context::parser_with_context; +use super::text::blank_line; +use super::text::line_break; +use super::text::space; +use super::text::text_element; +use super::token::ListItem; +use super::token::PlainList; +use super::token::TextElement; +use super::token::Token; +use super::Context; + +#[allow(dead_code)] +pub fn plain_list<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, PlainList<'s>> { + // todo + todo!() +} + +#[allow(dead_code)] +pub fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListItem<'s>> { + let (remaining, leading_whitespace) = space0(i)?; + let indent_level = leading_whitespace.len(); + let list_item_context = context.with_additional_node(ContextElement::ListItem(indent_level)); + let (remaining, (bul, countset, check, tg, sp, (contents, end))) = tuple(( + bullet, + opt(tuple((space, counter_set))), + opt(tuple((space, check_box))), + opt(tuple((space, item_tag))), + space, + context_many_till(&list_item_context, text_element, item_end), + ))(remaining)?; + + let elements = contents + .into_iter() + .filter_map(|token| match token { + Token::TextElement(text_element) => Some(text_element), + Token::Paragraph(_) => panic!("There should only be text elements in items."), + }) + .collect(); + + let source = { + let offset = remaining.as_ptr() as usize - i.as_ptr() as usize; + &i[..offset] + }; + + let ret = ListItem { + source, + leading_whitespace, + bullet: bul, + counter_set: countset.map(|(_spc, count)| count), + check_box: check.map(|(_spc, check)| check), + item_tag: tg.map(|(_spc, tg)| tg), + contents: elements, + }; + Ok((remaining, ret)) +} + +fn counter<'s>(i: &'s str) -> Res<&'s str, &'s str> { + alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i) +} + +fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> { + alt(( + tag("*"), + tag("-"), + tag("+"), + recognize(tuple((counter, alt((tag("."), tag(")")))))), + ))(i) +} + +fn counter_set<'s>(i: &'s str) -> Res<&'s str, &'s str> { + recognize(tuple((tag("[@"), counter, tag("]"))))(i) +} + +fn check_box<'s>(i: &'s str) -> Res<&'s str, &'s str> { + recognize(alt((tag("[ ]"), tag("[X]"), tag("[-]"))))(i) +} + +fn item_tag<'s>(i: &'s str) -> Res<&'s str, &'s str> { + recognize(tuple((tag_text, tag_separator)))(i) +} + +fn tag_text<'s>(i: &'s str) -> Res<&'s str, &'s str> { + recognize(many1(tag_text_character))(i) +} + +fn tag_text_character<'s>(i: &'s str) -> Res<&'s str, &'s str> { + not(alt((tag_separator, line_ending)))(i)?; + recognize(anychar)(i) +} + +fn tag_separator<'s>(i: &'s str) -> Res<&'s str, &'s str> { + tag(" :: ")(i) +} + +pub fn item_end<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &'s str> { + let item_matcher = parser_with_context!(item)(&context); + let line_indented_matcher = parser_with_context!(line_indented_lte)(&context); + alt(( + // TODO: This should ends the highest plain list + plain_list_end, + recognize(tuple((line_ending, peek(line_indented_matcher)))), + // TODO: Do we still need the item_matcher entry here? If we remove it, then child items should become part of the body of the parent item which would match the description on https://orgmode.org/worg/org-syntax.html + recognize(tuple((line_ending, peek(item_matcher)))), + ))(i) +} + +fn line_indented_lte<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &'s str> { + let current_item_indent_level: &usize = get_context_item_indent(context).ok_or( + nom::Err::Error(CustomError::MyError(MyError("NotInPlainListItem"))), + )?; + + let matched = recognize(verify( + tuple((space0::<&str, _>, anychar)), + |(_space0, _anychar)| _space0.len() <= *current_item_indent_level, + ))(i)?; + + Ok(matched) +} + +fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> { + for thing in context.iter() { + match thing.get_data() { + ContextElement::ListItem(depth) => return Some(depth), + _ => {} + }; + } + None +} + +pub fn plain_list_end(input: &str) -> Res<&str, &str> { + alt(( + recognize(tuple(( + map(line_break, TextElement::LineBreak), + blank_line, + many1(blank_line), + ))), + eof, + ))(input) +} diff --git a/src/parser/text.rs b/src/parser/text.rs index 4aaf632..a82a8df 100644 --- a/src/parser/text.rs +++ b/src/parser/text.rs @@ -24,7 +24,7 @@ pub fn line_break(input: &str) -> Res<&str, LineBreak> { map(line_ending, |s: &str| LineBreak { source: s })(input) } -fn space(input: &str) -> Res<&str, Space> { +pub fn space(input: &str) -> Res<&str, Space> { map(space1, |s: &str| Space { source: s })(input) } diff --git a/src/parser/token.rs b/src/parser/token.rs index 65d4bc0..a7eb0b4 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -95,3 +95,42 @@ impl<'a> Source<'a> for Paragraph<'a> { self.source } } + +#[derive(Debug)] +pub struct PlainList<'a> { + pub source: &'a str, +} + +impl<'a> Source<'a> for PlainList<'a> { + fn get_source(&'a self) -> &'a str { + self.source + } +} + +#[derive(Debug)] +pub struct ListItem<'a> { + pub source: &'a str, + pub leading_whitespace: &'a str, + pub bullet: &'a str, + pub counter_set: Option<&'a str>, + pub check_box: Option<&'a str>, + pub item_tag: Option<&'a str>, + pub contents: Vec>, +} + +impl<'a> Source<'a> for ListItem<'a> { + fn get_source(&'a self) -> &'a str { + self.source + } +} + +#[derive(Debug)] +pub struct ListCounter<'a> { + pub source: &'a str, +} + +impl<'a> Source<'a> for ListCounter<'a> { + fn get_source(&'a self) -> &'a str { + self.source + } +} diff --git a/src/parser/util.rs b/src/parser/util.rs index 44886ce..3600c13 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -9,6 +9,7 @@ pub fn in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) - ContextElement::Context(name) if *name == section_name => return true, ContextElement::Context(_) => {} ContextElement::StartOfParagraph => {} // TODO: If we specialize this to bold then this would be a good spot to stop scanning + ContextElement::ListItem(_) => {} } } false