Merge branch 'plainlist'

This commit is contained in:
Tom Alexander 2023-03-21 13:44:30 -04:00
commit 6b5db46205
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
15 changed files with 276 additions and 2 deletions

1
org_mode_samples/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.tree.txt

View File

@ -0,0 +1,11 @@
(defun org-dump-ast (outpath)
(let
(
;; (parsed-tree (format "%s" (org-element-parse-buffer)))
(parsed-tree (pp-to-string (org-element-parse-buffer)))
)
(with-temp-file outpath
(insert parsed-tree)
)
)
)

View File

@ -0,0 +1,17 @@
#!/usr/bin/env bash
#
set -euo pipefail
IFS=$'\n\t'
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
INPUT_FILE="$1"
OUTPUT_FILE="$2"
INIT_SCRIPT=$(cat <<EOF
(progn
(org-mode)
(org-dump-ast "$OUTPUT_FILE")
)
EOF
)
exec emacs -q --no-site-file --no-splash --batch --load "$DIR/common.el" --insert "$INPUT_FILE" --eval "$INIT_SCRIPT"

View File

@ -0,0 +1,22 @@
SHELL := bash
.ONESHELL:
.SHELLFLAGS := -eu -o pipefail -c
.DELETE_ON_ERROR:
MAKEFLAGS += --warn-undefined-variables
MAKEFLAGS += --no-builtin-rules
OUT=out
ifeq ($(origin .RECIPEPREFIX), undefined)
$(error This Make does not support .RECIPEPREFIX. Please use GNU Make 4.0 or later)
endif
.RECIPEPREFIX = >
.PHONY: all
all: paragraphs.tree.txt nested_paragraphs.tree.txt
.PHONY: clean
clean:
> rm -rf *.tree.txt
%.tree.txt: %.org ../common.el ../dump_org_ast.bash
> ../dump_org_ast.bash $< $@

View File

@ -0,0 +1,6 @@
lorem
1. foo
1. bar
baz

View File

@ -0,0 +1,7 @@
1. foo
2. bar
baz
3. lorem
ipsum

View File

@ -95,6 +95,7 @@ fn _preceded_by_whitespace<'r, 's>(context: Context<'r, 's>) -> bool {
return true;
}
ContextElement::Context(_) => {}
ContextElement::ListItem(_) => {}
}
} else {
break;

View File

@ -46,6 +46,7 @@ where
ContextElement::ExitMatcherNode(_) => None,
ContextElement::Context(_) => None,
ContextElement::StartOfParagraph => None,
ContextElement::ListItem(_) => None,
})
.collect();
if elements.is_empty() {
@ -93,6 +94,7 @@ where
}) => {
ret.push(token);
}
ContextElement::ListItem(_) => {}
};
}
ret.reverse();

View File

@ -7,8 +7,11 @@ mod list;
mod paragraph;
mod parser_context;
mod parser_with_context;
mod plain_list;
mod text;
mod token;
mod util;
pub use document::document;
type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>;
pub use parser_context::ContextTree;
pub use plain_list::item;

View File

@ -56,7 +56,7 @@ fn context_paragraph_end<'r, 's>(
paragraph_end(input)
}
fn paragraph_end(input: &str) -> Res<&str, &str> {
pub fn paragraph_end(input: &str) -> Res<&str, &str> {
alt((
recognize(tuple((
map(line_break, TextElement::LineBreak),

View File

@ -87,6 +87,7 @@ impl<'r, 's> ContextTree<'r, 's> {
ContextElement::PreviousElementNode(_) => {}
ContextElement::StartOfParagraph => {}
ContextElement::Context(_) => {}
ContextElement::ListItem(_) => {}
};
}
// TODO: Make this a specific error instead of just a generic MyError
@ -99,6 +100,7 @@ pub enum ContextElement<'r, 's> {
ExitMatcherNode(ExitMatcherNode<'r>),
PreviousElementNode(PreviousElementNode<'s>),
Context(&'r str),
ListItem(usize),
StartOfParagraph,
}
@ -115,6 +117,7 @@ pub struct PreviousElementNode<'r> {
#[derive(Clone)]
pub enum ChainBehavior<'r> {
AndParent(Option<&'r Matcher>),
#[allow(dead_code)]
IgnoreParent(Option<&'r Matcher>),
}

161
src/parser/plain_list.rs Normal file
View File

@ -0,0 +1,161 @@
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::anychar;
use nom::character::complete::digit1;
use nom::character::complete::line_ending;
use nom::character::complete::one_of;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many1;
use nom::sequence::tuple;
use super::combinator::context_many_till;
use super::error::CustomError;
use super::error::MyError;
use super::error::Res;
use super::parser_context::ContextElement;
use super::parser_with_context::parser_with_context;
use super::text::blank_line;
use super::text::line_break;
use super::text::space;
use super::text::text_element;
use super::token::ListItem;
use super::token::PlainList;
use super::token::TextElement;
use super::token::Token;
use super::Context;
#[allow(dead_code)]
pub fn plain_list<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, PlainList<'s>> {
// todo
todo!()
}
#[allow(dead_code)]
pub fn item<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, ListItem<'s>> {
let (remaining, leading_whitespace) = space0(i)?;
let indent_level = leading_whitespace.len();
let list_item_context = context.with_additional_node(ContextElement::ListItem(indent_level));
let (remaining, (bul, countset, check, tg, sp, (contents, end))) = tuple((
bullet,
opt(tuple((space, counter_set))),
opt(tuple((space, check_box))),
opt(tuple((space, item_tag))),
space,
context_many_till(&list_item_context, text_element, item_end),
))(remaining)?;
let elements = contents
.into_iter()
.filter_map(|token| match token {
Token::TextElement(text_element) => Some(text_element),
Token::Paragraph(_) => panic!("There should only be text elements in items."),
})
.collect();
let source = {
let offset = remaining.as_ptr() as usize - i.as_ptr() as usize;
&i[..offset]
};
let ret = ListItem {
source,
leading_whitespace,
bullet: bul,
counter_set: countset.map(|(_spc, count)| count),
check_box: check.map(|(_spc, check)| check),
item_tag: tg.map(|(_spc, tg)| tg),
contents: elements,
};
Ok((remaining, ret))
}
fn counter<'s>(i: &'s str) -> Res<&'s str, &'s str> {
alt((recognize(one_of("abcdefghijklmnopqrstuvwxyz")), digit1))(i)
}
fn bullet<'s>(i: &'s str) -> Res<&'s str, &'s str> {
alt((
tag("*"),
tag("-"),
tag("+"),
recognize(tuple((counter, alt((tag("."), tag(")")))))),
))(i)
}
fn counter_set<'s>(i: &'s str) -> Res<&'s str, &'s str> {
recognize(tuple((tag("[@"), counter, tag("]"))))(i)
}
fn check_box<'s>(i: &'s str) -> Res<&'s str, &'s str> {
recognize(alt((tag("[ ]"), tag("[X]"), tag("[-]"))))(i)
}
fn item_tag<'s>(i: &'s str) -> Res<&'s str, &'s str> {
recognize(tuple((tag_text, tag_separator)))(i)
}
fn tag_text<'s>(i: &'s str) -> Res<&'s str, &'s str> {
recognize(many1(tag_text_character))(i)
}
fn tag_text_character<'s>(i: &'s str) -> Res<&'s str, &'s str> {
not(alt((tag_separator, line_ending)))(i)?;
recognize(anychar)(i)
}
fn tag_separator<'s>(i: &'s str) -> Res<&'s str, &'s str> {
tag(" :: ")(i)
}
pub fn item_end<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &'s str> {
let item_matcher = parser_with_context!(item)(&context);
let line_indented_matcher = parser_with_context!(line_indented_lte)(&context);
alt((
// TODO: This should ends the highest plain list
plain_list_end,
recognize(tuple((line_ending, peek(line_indented_matcher)))),
// TODO: Do we still need the item_matcher entry here? If we remove it, then child items should become part of the body of the parent item which would match the description on https://orgmode.org/worg/org-syntax.html
recognize(tuple((line_ending, peek(item_matcher)))),
))(i)
}
fn line_indented_lte<'r, 's>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, &'s str> {
let current_item_indent_level: &usize = get_context_item_indent(context).ok_or(
nom::Err::Error(CustomError::MyError(MyError("NotInPlainListItem"))),
)?;
let matched = recognize(verify(
tuple((space0::<&str, _>, anychar)),
|(_space0, _anychar)| _space0.len() <= *current_item_indent_level,
))(i)?;
Ok(matched)
}
fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> {
for thing in context.iter() {
match thing.get_data() {
ContextElement::ListItem(depth) => return Some(depth),
_ => {}
};
}
None
}
pub fn plain_list_end(input: &str) -> Res<&str, &str> {
alt((
recognize(tuple((
map(line_break, TextElement::LineBreak),
blank_line,
many1(blank_line),
))),
eof,
))(input)
}

View File

@ -24,7 +24,7 @@ pub fn line_break(input: &str) -> Res<&str, LineBreak> {
map(line_ending, |s: &str| LineBreak { source: s })(input)
}
fn space(input: &str) -> Res<&str, Space> {
pub fn space(input: &str) -> Res<&str, Space> {
map(space1, |s: &str| Space { source: s })(input)
}

View File

@ -95,3 +95,42 @@ impl<'a> Source<'a> for Paragraph<'a> {
self.source
}
}
#[derive(Debug)]
pub struct PlainList<'a> {
pub source: &'a str,
}
impl<'a> Source<'a> for PlainList<'a> {
fn get_source(&'a self) -> &'a str {
self.source
}
}
#[derive(Debug)]
pub struct ListItem<'a> {
pub source: &'a str,
pub leading_whitespace: &'a str,
pub bullet: &'a str,
pub counter_set: Option<&'a str>,
pub check_box: Option<&'a str>,
pub item_tag: Option<&'a str>,
pub contents: Vec<TextElement<'a>>,
}
impl<'a> Source<'a> for ListItem<'a> {
fn get_source(&'a self) -> &'a str {
self.source
}
}
#[derive(Debug)]
pub struct ListCounter<'a> {
pub source: &'a str,
}
impl<'a> Source<'a> for ListCounter<'a> {
fn get_source(&'a self) -> &'a str {
self.source
}
}

View File

@ -9,6 +9,7 @@ pub fn in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) -
ContextElement::Context(name) if *name == section_name => return true,
ContextElement::Context(_) => {}
ContextElement::StartOfParagraph => {} // TODO: If we specialize this to bold then this would be a good spot to stop scanning
ContextElement::ListItem(_) => {}
}
}
false