Merge branch 'footnote_definition'

This commit is contained in:
Tom Alexander 2023-04-10 13:34:08 -04:00
commit e5f08beaca
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
14 changed files with 365 additions and 57 deletions

View File

@ -0,0 +1,23 @@
SHELL := bash
.ONESHELL:
.SHELLFLAGS := -eu -o pipefail -c
.DELETE_ON_ERROR:
MAKEFLAGS += --warn-undefined-variables
MAKEFLAGS += --no-builtin-rules
SRCFILES := $(wildcard *.org)
OUTFILES := $(patsubst %.org,%.tree.txt,$(SRCFILES))
ifeq ($(origin .RECIPEPREFIX), undefined)
$(error This Make does not support .RECIPEPREFIX. Please use GNU Make 4.0 or later)
endif
.RECIPEPREFIX = >
.PHONY: all
all: $(OUTFILES)
.PHONY: clean
clean:
> rm -rf $(OUTFILES)
%.tree.txt: %.org ../common.el ../dump_org_ast.bash
> ../dump_org_ast.bash $< $@

View File

@ -0,0 +1,8 @@
[fn:1] A footnote.
[fn:2] A multi-
line footnote.
not in the footnote.

View File

@ -2,7 +2,6 @@ use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::line_ending;
use nom::character::complete::space1;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::opt;
@ -20,7 +19,7 @@ use crate::parser::parser_context::ChainBehavior;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ContextTree;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::util::element_trailing_whitespace;
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
use super::element::Element;
use super::error::Res;
@ -100,48 +99,28 @@ pub fn document(input: &str) -> Res<&str, Document> {
fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Section<'s>> {
// TODO: The zeroth section is specialized so it probably needs its own parser
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::Context("section"))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&section_end)),
}))
.with_additional_node(ContextElement::Context("section"));
}));
let element_matcher = parser_with_context!(element)(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let trailing_matcher = parser_with_context!(element_trailing_whitespace)(&parser_context);
let (remaining, (children, _exit_contents)) = verify(
many_till(
tuple((
element_matcher,
opt(map(trailing_matcher, Element::TrailingWhitespace)),
)),
exit_matcher,
),
many_till(element_matcher, exit_matcher),
|(children, _exit_contents)| !children.is_empty(),
)(input)?;
let flattened_children: Vec<Element> = children
.into_iter()
.flat_map(|tpl| {
let mut flattened_children = Vec::with_capacity(2);
flattened_children.push(tpl.0);
if let Some(bar) = tpl.1 {
flattened_children.push(bar);
}
flattened_children.into_iter()
})
.collect();
let (remaining, _trailing_ws) = maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
Ok((
remaining,
Section {
source,
children: flattened_children,
},
))
Ok((remaining, Section { source, children }))
}
#[tracing::instrument(ret, level = "debug")]
fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let headline_matcher = parser_with_context!(headline)(context);
alt((recognize(headline_matcher), eof))(input)
recognize(headline_matcher)(input)
}
#[tracing::instrument(ret, level = "debug")]
@ -193,5 +172,5 @@ fn headline<'r, 's>(
#[tracing::instrument(ret, level = "debug")]
fn headline_end<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
alt((line_ending, eof))(input)
line_ending(input)
}

View File

@ -1,5 +1,7 @@
use super::error::Res;
use super::footnote_definition::footnote_definition;
use super::greater_block::greater_block;
use super::greater_element::FootnoteDefinition;
use super::greater_element::GreaterBlock;
use super::greater_element::PlainList;
use super::lesser_element::Paragraph;
@ -16,10 +18,7 @@ pub enum Element<'s> {
Paragraph(Paragraph<'s>),
PlainList(PlainList<'s>),
GreaterBlock(GreaterBlock<'s>),
/// The whitespace that follows an element.
///
/// This isn't a real org-mode element. Except for items in plain lists, trailing blank lines belong to the preceding element. It is a separate `Element` in this enum to make parsing easier.
TrailingWhitespace(&'s str),
FootnoteDefinition(FootnoteDefinition<'s>),
}
impl<'s> Source<'s> for Element<'s> {
@ -28,7 +27,7 @@ impl<'s> Source<'s> for Element<'s> {
Element::Paragraph(obj) => obj.source,
Element::PlainList(obj) => obj.source,
Element::GreaterBlock(obj) => obj.source,
Element::TrailingWhitespace(src) => src,
Element::FootnoteDefinition(obj) => obj.source,
}
}
}
@ -50,8 +49,10 @@ pub fn non_paragraph_element<'r, 's>(
) -> Res<&'s str, Element<'s>> {
let plain_list_matcher = parser_with_context!(plain_list)(context);
let greater_block_matcher = parser_with_context!(greater_block)(context);
let footnote_definition_matcher = parser_with_context!(footnote_definition)(context);
alt((
map(plain_list_matcher, Element::PlainList),
map(greater_block_matcher, Element::GreaterBlock),
map(footnote_definition_matcher, Element::FootnoteDefinition),
))(input)
}

View File

@ -0,0 +1,156 @@
use super::error::Res;
use super::util::WORD_CONSTITUENT_CHARACTERS;
use super::Context;
use crate::parser::element::element;
use crate::parser::greater_element::FootnoteDefinition;
use crate::parser::parser_context::ChainBehavior;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::blank_line;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::maybe_consume_trailing_whitespace;
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
use crate::parser::util::start_of_line;
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::bytes::complete::take_while;
use nom::character::complete::digit1;
use nom::character::complete::space0;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::tuple;
#[tracing::instrument(ret, level = "debug")]
pub fn footnote_definition<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, FootnoteDefinition<'s>> {
start_of_line(context, input)?;
// Cannot be indented.
let (remaining, (_lead_in, lbl, _lead_out, _ws)) =
tuple((tag_no_case("[fn:"), label, tag("]"), space0))(input)?;
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::IgnoreParent(Some(&footnote_definition_end)),
}));
// TODO: The problem is we are not accounting for trailing whitespace like we do in section. Maybe it would be easier if we passed down whether or not to parse trailing whitespace into the element matcher similar to how tag takes in parameters.
let element_matcher = parser_with_context!(element)(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let (remaining, (children, _exit_contents)) =
many_till(element_matcher, exit_matcher)(remaining)?;
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
Ok((
remaining,
FootnoteDefinition {
source,
label: lbl,
children,
},
))
}
#[tracing::instrument(ret, level = "debug")]
fn label<'s>(input: &'s str) -> Res<&'s str, &'s str> {
alt((
digit1,
take_while(|c| WORD_CONSTITUENT_CHARACTERS.contains(c) || "-_".contains(c)),
))(input)
}
#[tracing::instrument(ret, level = "debug")]
fn footnote_definition_end<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
let start_of_line_matcher = parser_with_context!(start_of_line)(context);
let footnote_definition_matcher = parser_with_context!(footnote_definition)(context);
let maybe_consume_trailing_whitespace_matcher =
parser_with_context!(maybe_consume_trailing_whitespace)(context);
alt((
recognize(tuple((
maybe_consume_trailing_whitespace_matcher,
footnote_definition_matcher,
))),
recognize(tuple((
start_of_line_matcher,
verify(many1(blank_line), |lines: &Vec<&str>| lines.len() >= 2),
))),
))(input)
}
#[cfg(test)]
mod tests {
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ContextTree;
use crate::parser::parser_with_context::parser_with_context;
use super::*;
#[test]
fn two_paragraphs() {
let input = "[fn:1] A footnote.
[fn:2] A multi-
line footnote.";
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let footnote_definition_matcher =
parser_with_context!(footnote_definition)(&document_context);
let (remaining, first_footnote_definition) =
footnote_definition_matcher(input).expect("Parse first footnote_definition");
let (remaining, second_footnote_definition) =
footnote_definition_matcher(remaining).expect("Parse second footnote_definition.");
assert_eq!(remaining, "");
assert_eq!(
first_footnote_definition.source,
"[fn:1] A footnote.
"
);
assert_eq!(
second_footnote_definition.source,
"[fn:2] A multi-
line footnote."
);
}
#[test]
fn multiline_break() {
let input = "[fn:2] A multi-
line footnote.
not in the footnote.";
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let footnote_definition_matcher =
parser_with_context!(footnote_definition)(&document_context);
let (remaining, first_footnote_definition) =
footnote_definition_matcher(input).expect("Parse first footnote_definition");
assert_eq!(remaining, "not in the footnote.");
assert_eq!(
first_footnote_definition.source,
"[fn:2] A multi-
line footnote.
"
);
}
}

View File

@ -10,6 +10,7 @@ use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
use crate::parser::util::start_of_line;
use nom::branch::alt;
use nom::bytes::complete::is_not;
@ -41,10 +42,11 @@ pub fn greater_block<'r, 's>(
let (remaining, parameters) = opt(tuple((space1, parameters)))(remaining)?;
let (remaining, _nl) = line_ending(remaining)?;
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(true))
.with_additional_node(ContextElement::GreaterBlock(name))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::IgnoreParent(Some(&greater_block_end)),
}))
.with_additional_node(ContextElement::GreaterBlock(name));
}));
let element_matcher = parser_with_context!(element)(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
@ -52,6 +54,12 @@ pub fn greater_block<'r, 's>(
let (remaining, (children, _exit_contents)) =
many_till(element_matcher, exit_matcher)(remaining)?;
let (remaining, _end) = greater_block_end(&parser_context, remaining)?;
// Not checking if parent exit matcher is causing exit because the greater_block_end matcher asserts we matched a full greater block
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let parameters = match parameters {
Some((_ws, parameters)) => Some(parameters),
None => None,

View File

@ -21,3 +21,10 @@ pub struct GreaterBlock<'s> {
pub parameters: Option<&'s str>,
pub children: Vec<Element<'s>>,
}
#[derive(Debug)]
pub struct FootnoteDefinition<'s> {
pub source: &'s str,
pub label: &'s str,
pub children: Vec<Element<'s>>,
}

View File

@ -31,6 +31,12 @@ impl<T> List<T> {
List { head: None }
}
pub fn branch_from(trunk: &Rc<Node<T>>) -> Self {
List {
head: Some(trunk.clone()),
}
}
pub fn push_front(&self, data: T) -> List<T> {
List {
head: Some(Rc::new(Node {

View File

@ -1,6 +1,7 @@
mod document;
mod element;
mod error;
mod footnote_definition;
mod greater_block;
mod greater_element;
mod lesser_element;

View File

@ -12,6 +12,7 @@ use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::maybe_consume_trailing_whitespace_if_not_exiting;
use crate::parser::util::start_of_line;
use super::element::non_paragraph_element;
@ -35,6 +36,11 @@ pub fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st
|(children, _exit_contents)| !children.is_empty(),
)(input)?;
// Not checking parent exit matcher because if there are any children matched then we have a valid paragraph.
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, Paragraph { source, children }))
@ -50,3 +56,27 @@ fn paragraph_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st
eof,
))(input)
}
#[cfg(test)]
mod tests {
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ContextTree;
use crate::parser::parser_with_context::parser_with_context;
use super::*;
#[test]
fn two_paragraphs() {
let input = "foo bar baz\n\nlorem ipsum";
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let paragraph_matcher = parser_with_context!(paragraph)(&document_context);
let (remaining, first_paragraph) = paragraph_matcher(input).expect("Parse first paragraph");
let (remaining, second_paragraph) =
paragraph_matcher(remaining).expect("Parse second paragraph.");
assert_eq!(remaining, "");
assert_eq!(first_paragraph.source, "foo bar baz\n\n");
assert_eq!(second_paragraph.source, "lorem ipsum");
}
}

View File

@ -1,8 +1,9 @@
use std::rc::Rc;
use nom::combinator::eof;
use nom::IResult;
use crate::parser::util::whitespace_eof;
use super::error::CustomError;
use super::error::MyError;
use super::error::Res;
@ -22,6 +23,12 @@ impl<'r, 's> ContextTree<'r, 's> {
ContextTree { tree: List::new() }
}
pub fn branch_from(trunk: &Rc<Node<ContextElement<'r, 's>>>) -> Self {
ContextTree {
tree: List::branch_from(trunk)
}
}
pub fn ptr_eq<'x, 'y>(&self, other: &ContextTree<'x, 'y>) -> bool {
self.tree.ptr_eq(&other.tree)
}
@ -60,7 +67,7 @@ impl<'r, 's> ContextTree<'r, 's> {
i: &'s str,
) -> IResult<&'s str, &'s str, CustomError<&'s str>> {
// Special check for EOF. We don't just make this a document-level exit matcher since the IgnoreParent ChainBehavior could cause early exit matchers to not run.
let at_end_of_file = eof(i);
let at_end_of_file = whitespace_eof(i);
if at_end_of_file.is_ok() {
return at_end_of_file;
}
@ -76,14 +83,16 @@ impl<'r, 's> ContextTree<'r, 's> {
ContextElement::ExitMatcherNode(exit_matcher) => {
match exit_matcher.exit_matcher {
ChainBehavior::AndParent(Some(matcher)) => {
let local_result = matcher(self, i);
let local_context = ContextTree::branch_from(current_node);
let local_result = matcher(&local_context, i);
if local_result.is_ok() {
return local_result;
}
}
ChainBehavior::AndParent(None) => {}
ChainBehavior::IgnoreParent(Some(matcher)) => {
let local_result = matcher(self, i);
let local_context = ContextTree::branch_from(current_node);
let local_result = matcher(&local_context, i);
if local_result.is_ok() {
return local_result;
}
@ -115,6 +124,26 @@ impl<'r, 's> ContextTree<'r, 's> {
}
None
}
/// Indicates if elements should consume the whitespace after them.
///
/// Defaults to true.
pub fn should_consume_trailing_whitespace(&self) -> bool {
self._should_consume_trailing_whitespace().unwrap_or(true)
}
fn _should_consume_trailing_whitespace(&self) -> Option<bool> {
for current_node in self.iter() {
let context_element = current_node.get_data();
match context_element {
ContextElement::ConsumeTrailingWhitespace(should) => {
return Some(*should);
}
_ => {}
}
}
None
}
}
#[derive(Debug)]
@ -123,14 +152,19 @@ pub enum ContextElement<'r, 's> {
///
/// This is used for look-behind.
DocumentRoot(&'s str),
/// Stores a parser that indicates that children should exit upon matching an exit matcher.
ExitMatcherNode(ExitMatcherNode<'r>),
Context(&'r str),
/// Stores the indentation level of the current list item
/// Stores the indentation level of the current list item.
ListItem(usize),
/// Stores the name of the greater block
/// Stores the name of the greater block.
GreaterBlock(&'s str),
/// Indicates if elements should consume the whitespace after them.
ConsumeTrailingWhitespace(bool),
}
#[derive(Debug)]

View File

@ -4,6 +4,7 @@ use super::error::Res;
use super::greater_element::PlainList;
use super::greater_element::PlainListItem;
use super::parser_with_context::parser_with_context;
use super::util::maybe_consume_trailing_whitespace_if_not_exiting;
use super::util::non_whitespace_character;
use super::Context;
use crate::parser::element::element;
@ -27,6 +28,7 @@ use nom::multi::many_till;
use nom::sequence::tuple;
pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainList<'s>> {
// TODO: Are we handling 2 blank lines causing the end of all plain lists?
let (mut remaining, first_item) = plain_list_item(context, input)?;
let first_item_indentation = first_item.indentation;
let plain_list_item_matcher = parser_with_context!(plain_list_item)(context);
@ -48,6 +50,10 @@ pub fn plain_list<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s s
Ok(_) | Err(_) => break,
};
}
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, PlainList { source, children }))
}
@ -62,10 +68,11 @@ pub fn plain_list_item<'r, 's>(
// It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09)
let indent_level = leading_whitespace.len();
let parser_context = context
.with_additional_node(ContextElement::ConsumeTrailingWhitespace(false))
.with_additional_node(ContextElement::ListItem(indent_level))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)),
}))
.with_additional_node(ContextElement::ListItem(indent_level));
}));
let element_matcher = parser_with_context!(element)(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);

View File

@ -1,19 +1,25 @@
use nom::branch::alt;
use nom::character::complete::line_ending;
use nom::character::complete::none_of;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::multi::many0;
use nom::sequence::tuple;
use crate::parser::parser_with_context::parser_with_context;
use super::error::CustomError;
use super::error::MyError;
use super::error::Res;
use super::parser_context::ContextElement;
use super::Context;
use nom::branch::alt;
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::none_of;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::multi::many0;
use nom::sequence::tuple;
pub const WORD_CONSTITUENT_CHARACTERS: &str =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
/// Check if we are below a section of the given section type regardless of depth
pub fn in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool {
@ -86,6 +92,35 @@ pub fn element_trailing_whitespace<'r, 's>(
alt((eof, recognize(many0(blank_line))))(input)
}
#[tracing::instrument(ret, level = "debug")]
pub fn maybe_consume_trailing_whitespace_if_not_exiting<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, Option<&'s str>> {
if context.should_consume_trailing_whitespace() && exit_matcher_parser(context, input).is_err()
{
Ok(opt(parser_with_context!(element_trailing_whitespace)(
context,
))(input)?)
} else {
Ok((input, None))
}
}
#[tracing::instrument(ret, level = "debug")]
pub fn maybe_consume_trailing_whitespace<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, Option<&'s str>> {
if context.should_consume_trailing_whitespace() {
Ok(opt(parser_with_context!(element_trailing_whitespace)(
context,
))(input)?)
} else {
Ok((input, None))
}
}
#[tracing::instrument(ret, level = "debug")]
pub fn trailing_whitespace(input: &str) -> Res<&str, &str> {
alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input)
@ -162,6 +197,11 @@ pub fn regurgitate<'s>(input: &'s str, remaining: &'s str) -> &'s str {
}
}
#[tracing::instrument(ret, level = "debug")]
pub fn whitespace_eof(input: &str) -> Res<&str, &str> {
recognize(tuple((multispace0, eof)))(input)
}
#[cfg(test)]
mod tests {
use super::*;

View File

@ -28,3 +28,11 @@ foo bar
1. foo
2. bar
#+end_center
[fn:1] A footnote.
[fn:2] A multi-
line footnote.
not in the footnote.