Merge branch 'plainlist'

2023-03-21 13:44:30 -04:00 · 2023-03-21 13:44:30 -04:00 · 6b5db46205
commit 6b5db46205
parent f39319702c cee12b7512
15 changed files with 276 additions and 2 deletions
--- a/org_mode_samples/.gitignore
+++ b/org_mode_samples/.gitignore
@ -0,0 +1 @@
+*.tree.txt
--- a/org_mode_samples/common.el
+++ b/org_mode_samples/common.el
@ -0,0 +1,11 @@
+(defun org-dump-ast (outpath)
+  (let
+      (
+       ;; (parsed-tree (format "%s" (org-element-parse-buffer)))
+       (parsed-tree (pp-to-string (org-element-parse-buffer)))
+       )
+    (with-temp-file outpath
+      (insert parsed-tree)
+      )
+    )
+  )
--- a/org_mode_samples/dump_org_ast.bash
+++ b/org_mode_samples/dump_org_ast.bash
@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+#
+set -euo pipefail
+IFS=$'\n\t'
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+INPUT_FILE="$1"
+OUTPUT_FILE="$2"
+
+INIT_SCRIPT=$(cat <<EOF
+(progn
+     (org-mode)
+     (org-dump-ast "$OUTPUT_FILE")
+)
+EOF
+)
+exec emacs -q --no-site-file --no-splash --batch --load "$DIR/common.el" --insert "$INPUT_FILE" --eval "$INIT_SCRIPT"
--- a/org_mode_samples/plain_lists/Makefile
+++ b/org_mode_samples/plain_lists/Makefile
@ -0,0 +1,22 @@
+SHELL := bash
+.ONESHELL:
+.SHELLFLAGS := -eu -o pipefail -c
+.DELETE_ON_ERROR:
+MAKEFLAGS += --warn-undefined-variables
+MAKEFLAGS += --no-builtin-rules
+OUT=out
+
+ifeq ($(origin .RECIPEPREFIX), undefined)
+  $(error This Make does not support .RECIPEPREFIX. Please use GNU Make 4.0 or later)
+endif
+.RECIPEPREFIX = >
+
+.PHONY: all
+all: paragraphs.tree.txt nested_paragraphs.tree.txt
+
+.PHONY: clean
+clean:
+> rm -rf *.tree.txt
+
+%.tree.txt: %.org ../common.el ../dump_org_ast.bash
+> ../dump_org_ast.bash $< $@
--- a/org_mode_samples/plain_lists/nested_paragraphs.org
+++ b/org_mode_samples/plain_lists/nested_paragraphs.org
@ -0,0 +1,6 @@
+lorem
+1. foo
+   1. bar
+
+
+baz
--- a/org_mode_samples/plain_lists/paragraphs.org
+++ b/org_mode_samples/plain_lists/paragraphs.org
@ -0,0 +1,7 @@
+1. foo
+2. bar
+   baz
+3. lorem
+
+
+   ipsum
--- a/src/parser/bold.rs
+++ b/src/parser/bold.rs
@ -95,6 +95,7 @@ fn _preceded_by_whitespace<'r, 's>(context: Context<'r, 's>) -> bool {
                    return true;
                }
                ContextElement::Context(_) => {}
+                ContextElement::ListItem(_) => {}
            }
        } else {
            break;
--- a/src/parser/combinator.rs
+++ b/src/parser/combinator.rs
@ -46,6 +46,7 @@ where
                ContextElement::ExitMatcherNode(_) => None,
                ContextElement::Context(_) => None,
                ContextElement::StartOfParagraph => None,
+                ContextElement::ListItem(_) => None,
            })
            .collect();
        if elements.is_empty() {
@ -93,6 +94,7 @@ where
                            }) => {
                                ret.push(token);
                            }
+                            ContextElement::ListItem(_) => {}
                        };
                    }
                    ret.reverse();
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@ -7,8 +7,11 @@ mod list;
 mod paragraph;
 mod parser_context;
 mod parser_with_context;
+mod plain_list;
 mod text;
 mod token;
 mod util;
 pub use document::document;
 type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>;
+pub use parser_context::ContextTree;
+pub use plain_list::item;
--- a/src/parser/paragraph.rs
+++ b/src/parser/paragraph.rs
@ -56,7 +56,7 @@ fn context_paragraph_end<'r, 's>(
    paragraph_end(input)
 }

-fn paragraph_end(input: &str) -> Res<&str, &str> {
+pub fn paragraph_end(input: &str) -> Res<&str, &str> {
    alt((
        recognize(tuple((
            map(line_break, TextElement::LineBreak),
--- a/src/parser/parser_context.rs
+++ b/src/parser/parser_context.rs
@ -87,6 +87,7 @@ impl<'r, 's> ContextTree<'r, 's> {
                ContextElement::PreviousElementNode(_) => {}
                ContextElement::StartOfParagraph => {}
                ContextElement::Context(_) => {}
+                ContextElement::ListItem(_) => {}
            };
        }
        // TODO: Make this a specific error instead of just a generic MyError
@ -99,6 +100,7 @@ pub enum ContextElement<'r, 's> {
    ExitMatcherNode(ExitMatcherNode<'r>),
    PreviousElementNode(PreviousElementNode<'s>),
    Context(&'r str),
+    ListItem(usize),
    StartOfParagraph,
 }

@ -115,6 +117,7 @@ pub struct PreviousElementNode<'r> {
 #[derive(Clone)]
 pub enum ChainBehavior<'r> {
    AndParent(Option<&'r Matcher>),
+    #[allow(dead_code)]
    IgnoreParent(Option<&'r Matcher>),
 }

--- a/src/parser/plain_list.rs
+++ b/src/parser/plain_list.rs
@ -0,0 +1,161 @@
+use nom::branch::alt;
+use nom::bytes::complete::tag;
+use nom::character::complete::anychar;
+use nom::character::complete::digit1;
+use nom::character::complete::line_ending;
+use nom::character::complete::one_of;
+use nom::character::complete::space0;
+use nom::combinator::eof;
+use nom::combinator::map;
+use nom::combinator::not;
+use nom::combinator::opt;
+use nom::combinator::peek;
+use nom::combinator::recognize;
+use nom::combinator::verify;
+use nom::multi::many1;
+use nom::sequence::tuple;
+
+use super::combinator::context_many_till;
+use super::error::CustomError;
+use super::error::MyError;
+use super::error::Res;
+use super::parser_context::ContextElement;
+use super::parser_with_context::parser_with_context;
+use super::text::blank_line;
+use super::text::line_break;
+use super::text::space;
+use super::text::text_element;
+use super::token::ListItem;
+use super::token::PlainList;
+use super::token::TextElement;
+use super::token::Token;
+use super::Context;
+
+#[allow(dead_code)]
+pub fn plain_list<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, PlainList<'s>> {
+    // todo
+    todo!()
+}
+
+#[allow(dead_code)]
+pub fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListItem<'s>> {
+    let (remaining, leading_whitespace) = space0(i)?;
+    let indent_level = leading_whitespace.len();
+    let list_item_context = context.with_additional_node(ContextElement::ListItem(indent_level));
+    let (remaining, (bul, countset, check, tg, sp, (contents, end))) = tuple((
+        bullet,
+        opt(tuple((space, counter_set))),
+        opt(tuple((space, check_box))),
+        opt(tuple((space, item_tag))),
+        space,
+        context_many_till(&list_item_context, text_element, item_end),
+    ))(remaining)?;
+
+    let elements = contents
+        .into_iter()
+        .filter_map(|token| match token {
+            Token::TextElement(text_element) => Some(text_element),
+            Token::Paragraph(_) => panic!("There should only be text elements in items."),
+        })
+        .collect();
+
+    let source = {
+        let offset = remaining.as_ptr() as usize - i.as_ptr() as usize;
+        &i[..offset]
+    };
+
+    let ret = ListItem {
+        source,
+        leading_whitespace,
+        bullet: bul,
+        counter_set: countset.map(|(_spc, count)| count),
+        check_box: check.map(|(_spc, check)| check),
+        item_tag: tg.map(|(_spc, tg)| tg),
+        contents: elements,
+    };
+    Ok((remaining, ret))
+}
+
+fn counter<'s>(i: &'s str) -> Res<&'s str, &'s str> {
+    alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i)
+}
+
+fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> {
+    alt((
+        tag("*"),
+        tag("-"),
+        tag("+"),
+        recognize(tuple((counter, alt((tag("."), tag(")")))))),
+    ))(i)
+}
+
+fn counter_set<'s>(i: &'s str) -> Res<&'s str, &'s str> {
+    recognize(tuple((tag("[@"), counter, tag("]"))))(i)
+}
+
+fn check_box<'s>(i: &'s str) -> Res<&'s str, &'s str> {
+    recognize(alt((tag("[ ]"), tag("[X]"), tag("[-]"))))(i)
+}
+
+fn item_tag<'s>(i: &'s str) -> Res<&'s str, &'s str> {
+    recognize(tuple((tag_text, tag_separator)))(i)
+}
+
+fn tag_text<'s>(i: &'s str) -> Res<&'s str, &'s str> {
+    recognize(many1(tag_text_character))(i)
+}
+
+fn tag_text_character<'s>(i: &'s str) -> Res<&'s str, &'s str> {
+    not(alt((tag_separator, line_ending)))(i)?;
+    recognize(anychar)(i)
+}
+
+fn tag_separator<'s>(i: &'s str) -> Res<&'s str, &'s str> {
+    tag(" :: ")(i)
+}
+
+pub fn item_end<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &'s str> {
+    let item_matcher = parser_with_context!(item)(&context);
+    let line_indented_matcher = parser_with_context!(line_indented_lte)(&context);
+    alt((
+        // TODO: This should ends the highest plain list
+        plain_list_end,
+        recognize(tuple((line_ending, peek(line_indented_matcher)))),
+        // TODO: Do we still need the item_matcher entry here? If we remove it, then child items should become part of the body of the parent item which would match the description on https://orgmode.org/worg/org-syntax.html
+        recognize(tuple((line_ending, peek(item_matcher)))),
+    ))(i)
+}
+
+fn line_indented_lte<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &'s str> {
+    let current_item_indent_level: &usize = get_context_item_indent(context).ok_or(
+        nom::Err::Error(CustomError::MyError(MyError("NotInPlainListItem"))),
+    )?;
+
+    let matched = recognize(verify(
+        tuple((space0::<&str, _>, anychar)),
+        |(_space0, _anychar)| _space0.len() <= *current_item_indent_level,
+    ))(i)?;
+
+    Ok(matched)
+}
+
+fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> {
+    for thing in context.iter() {
+        match thing.get_data() {
+            ContextElement::ListItem(depth) => return Some(depth),
+            _ => {}
+        };
+    }
+    None
+}
+
+pub fn plain_list_end(input: &str) -> Res<&str, &str> {
+    alt((
+        recognize(tuple((
+            map(line_break, TextElement::LineBreak),
+            blank_line,
+            many1(blank_line),
+        ))),
+        eof,
+    ))(input)
+}
--- a/src/parser/text.rs
+++ b/src/parser/text.rs
@ -24,7 +24,7 @@ pub fn line_break(input: &str) -> Res<&str, LineBreak> {
    map(line_ending, |s: &str| LineBreak { source: s })(input)
 }

-fn space(input: &str) -> Res<&str, Space> {
+pub fn space(input: &str) -> Res<&str, Space> {
    map(space1, |s: &str| Space { source: s })(input)
 }

--- a/src/parser/token.rs
+++ b/src/parser/token.rs
@ -95,3 +95,42 @@ impl<'a> Source<'a> for Paragraph<'a> {
        self.source
    }
 }
+
+#[derive(Debug)]
+pub struct PlainList<'a> {
+    pub source: &'a str,
+}
+
+impl<'a> Source<'a> for PlainList<'a> {
+    fn get_source(&'a self) -> &'a str {
+        self.source
+    }
+}
+
+#[derive(Debug)]
+pub struct ListItem<'a> {
+    pub source: &'a str,
+    pub leading_whitespace: &'a str,
+    pub bullet: &'a str,
+    pub counter_set: Option<&'a str>,
+    pub check_box: Option<&'a str>,
+    pub item_tag: Option<&'a str>,
+    pub contents: Vec<TextElement<'a>>,
+}
+
+impl<'a> Source<'a> for ListItem<'a> {
+    fn get_source(&'a self) -> &'a str {
+        self.source
+    }
+}
+
+#[derive(Debug)]
+pub struct ListCounter<'a> {
+    pub source: &'a str,
+}
+
+impl<'a> Source<'a> for ListCounter<'a> {
+    fn get_source(&'a self) -> &'a str {
+        self.source
+    }
+}
--- a/src/parser/util.rs
+++ b/src/parser/util.rs
@ -9,6 +9,7 @@ pub fn in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) -
            ContextElement::Context(name) if *name == section_name => return true,
            ContextElement::Context(_) => {}
            ContextElement::StartOfParagraph => {} // TODO: If we specialize this to bold then this would be a good spot to stop scanning
+            ContextElement::ListItem(_) => {}
        }
    }
    false