Merge branch 'objects_and_elements'

2023-03-25 12:04:33 -04:00 · 2023-03-25 12:04:33 -04:00 · 6558f9b7c2
commit 6558f9b7c2
parent f060e7189b e3f6dd497a
23 changed files with 736 additions and 144 deletions
--- a/25
+++ b/25
@ -0,0 +1,25 @@
+SHELL := bash
+.ONESHELL:
+.SHELLFLAGS := -eu -o pipefail -c
+.DELETE_ON_ERROR:
+MAKEFLAGS += --warn-undefined-variables
+MAKEFLAGS += --no-builtin-rules
+
+ifeq ($(origin .RECIPEPREFIX), undefined)
+  $(error This Make does not support .RECIPEPREFIX. Please use GNU Make 4.0 or later)
+endif
+.RECIPEPREFIX = >
+
+.PHONY: build
+build: target/debug/toy
+
+.PHONY: clean
+clean:
+> cargo clean
+
+target/debug/toy:
+> cargo build
+
+.PHONY: jaeger
+jaeger:
+> docker run -d --rm -p 6831:6831/udp -p 6832:6832/udp -p 16686:16686 -p 14268:14268 jaegertracing/all-in-one:latest
--- a/language_rules.txt
+++ b/language_rules.txt
@ -1 +0,0 @@
-Two line breaks to end paragraph except in code blocks
--- a/org_mode_samples/exit_matcher_investigation/bold_with_asterisk_inside.org
+++ b/org_mode_samples/exit_matcher_investigation/bold_with_asterisk_inside.org
@ -0,0 +1 @@
+foo *bar baz * lorem* ipsum
--- a/org_mode_samples/paragraphs/Makefile
+++ b/org_mode_samples/paragraphs/Makefile
@ -0,0 +1,23 @@
+SHELL := bash
+.ONESHELL:
+.SHELLFLAGS := -eu -o pipefail -c
+.DELETE_ON_ERROR:
+MAKEFLAGS += --warn-undefined-variables
+MAKEFLAGS += --no-builtin-rules
+SRCFILES := $(wildcard *.org)
+OUTFILES := $(patsubst %.org,%.tree.txt,$(SRCFILES))
+
+ifeq ($(origin .RECIPEPREFIX), undefined)
+  $(error This Make does not support .RECIPEPREFIX. Please use GNU Make 4.0 or later)
+endif
+.RECIPEPREFIX = >
+
+.PHONY: all
+all: $(OUTFILES)
+
+.PHONY: clean
+clean:
+> rm -rf $(OUTFILES)
+
+%.tree.txt: %.org ../common.el ../dump_org_ast.bash
+> ../dump_org_ast.bash $< $@
--- a/org_mode_samples/paragraphs/paragraph_with_backslash_line_breaks.org
+++ b/org_mode_samples/paragraphs/paragraph_with_backslash_line_breaks.org
@ -0,0 +1,7 @@
+This is a paragraph
+
+This is another paragraph
+This is a second line in that paragraph
+
+This is a third paragraph \\
+This is a second line in that paragraph
--- a/org_mode_samples/sections_and_headings/Makefile
+++ b/org_mode_samples/sections_and_headings/Makefile
@ -0,0 +1,23 @@
+SHELL := bash
+.ONESHELL:
+.SHELLFLAGS := -eu -o pipefail -c
+.DELETE_ON_ERROR:
+MAKEFLAGS += --warn-undefined-variables
+MAKEFLAGS += --no-builtin-rules
+SRCFILES := $(wildcard *.org)
+OUTFILES := $(patsubst %.org,%.tree.txt,$(SRCFILES))
+
+ifeq ($(origin .RECIPEPREFIX), undefined)
+  $(error This Make does not support .RECIPEPREFIX. Please use GNU Make 4.0 or later)
+endif
+.RECIPEPREFIX = >
+
+.PHONY: all
+all: $(OUTFILES)
+
+.PHONY: clean
+clean:
+> rm -rf $(OUTFILES)
+
+%.tree.txt: %.org ../common.el ../dump_org_ast.bash
+> ../dump_org_ast.bash $< $@
--- a/org_mode_samples/sections_and_headings/immediate_heading.org
+++ b/org_mode_samples/sections_and_headings/immediate_heading.org
@ -0,0 +1 @@
+* Start a document with an immediate heading
--- a/org_mode_samples/sections_and_headings/sections_and_headings.org
+++ b/org_mode_samples/sections_and_headings/sections_and_headings.org
@ -0,0 +1,7 @@
+Before the first heading
+* The first heading
+body of the first section
+** Child heading
+body of child heading
+* second top-level heading
+body of second top-level heading
--- a/src/main.rs
+++ b/src/main.rs
@ -1,3 +1,4 @@
+#![feature(round_char_boundary)]
 use crate::parser::document;
 use tracing::Level;
 use tracing_subscriber::fmt::format::FmtSpan;
--- a/src/parser/bold.rs
+++ b/src/parser/bold.rs
@ -96,6 +96,9 @@ fn _preceded_by_whitespace<'r, 's>(context: Context<'r, 's>) -> bool {
                }
                ContextElement::Context(_) => {}
                ContextElement::ListItem(_) => {}
+                ContextElement::DocumentRoot(_) => {
+                    return true;
+                }
            }
        } else {
            break;
--- a/src/parser/combinator.rs
+++ b/src/parser/combinator.rs
@ -1,37 +1,26 @@
-use super::parser_context::ContextElement;
-use super::parser_context::PreviousElementNode;
-use super::token::Token;
-use super::Context;
-use nom::error::ErrorKind;
 use nom::error::ParseError;
 use nom::IResult;
 use nom::InputLength;

-pub fn context_many1<'r, 's, I, O, E, M>(
+use super::Context;
+
+pub fn context_many1<'r: 's, 's, I, O, E, M>(
    context: Context<'r, 's>,
    mut many_matcher: M,
-) -> impl FnMut(I) -> IResult<I, Vec<Token<'s>>, E> + 'r
+) -> impl FnMut(I) -> IResult<I, Vec<O>, E> + 'r
 where
    I: Clone + InputLength,
    E: ParseError<I>,
    M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
-    O: Into<Token<'s>>,
 {
    move |mut i: I| {
        let mut err = None;
-        // TODO: Can I eliminate the clone? I think this is incrementing the reference count
-        let mut current_context = context.clone();
-        // Despite the clone, the Rc should still point to the same value.
-        assert!(current_context.ptr_eq(context));
+        let mut elements: Vec<O> = Vec::new();
        loop {
-            match many_matcher(&current_context, i.clone()) {
+            match many_matcher(&context, i.clone()) {
                Ok((remaining, many_elem)) => {
-                    current_context = current_context.with_additional_node(
-                        ContextElement::PreviousElementNode(PreviousElementNode {
-                            element: many_elem.into(),
-                        }),
-                    );
                    i = remaining;
+                    elements.push(many_elem);
                }
                the_error @ Err(_) => {
                    err = Some(the_error);
@ -39,93 +28,11 @@ where
                }
            }
        }
-        let mut elements: Vec<Token<'s>> = current_context
-            .into_iter_until(context)
-            .filter_map(|context_element| match context_element {
-                ContextElement::PreviousElementNode(elem) => Some(elem.element),
-                ContextElement::ExitMatcherNode(_) => None,
-                ContextElement::Context(_) => None,
-                ContextElement::StartOfParagraph => None,
-                ContextElement::ListItem(_) => None,
-            })
-            .collect();
        if elements.is_empty() {
            if let Some(err) = err {
                err?;
            }
        }
-        elements.reverse();
        Ok((i, elements))
    }
 }
-
-pub fn context_many_till<'r, 's, I, O, E, F, M, T>(
-    context: Context<'r, 's>,
-    mut many_matcher: M,
-    mut till_matcher: T,
-) -> impl FnMut(I) -> IResult<I, (Vec<Token<'s>>, F), E> + 'r
-where
-    I: Clone + InputLength,
-    E: ParseError<I>,
-    M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
-    T: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, F, E> + 'r,
-    O: Into<Token<'s>>,
-{
-    move |mut i: I| {
-        // TODO: Can I eliminate the clone? I think this is incrementing the reference count
-        let mut current_context = context.clone();
-        // Despite the clone, the Rc should still point to the same value, otherwise we'll get stuck in an endless loop.
-        assert!(current_context.ptr_eq(context));
-        loop {
-            let len = i.input_len();
-            match till_matcher(&current_context, i.clone()) {
-                Ok((remaining, finish)) => {
-                    let mut ret = Vec::new();
-                    while !current_context.ptr_eq(context) {
-                        let (context_element, next_context) = current_context.pop_front();
-                        let context_element = context_element.expect("We only pop off context elements created in this function, so they are all Some()");
-                        current_context = next_context;
-                        match context_element {
-                            ContextElement::ExitMatcherNode(_) => {}
-                            ContextElement::StartOfParagraph => {}
-                            ContextElement::Context(_) => {}
-                            ContextElement::PreviousElementNode(PreviousElementNode {
-                                element: token,
-                            }) => {
-                                ret.push(token);
-                            }
-                            ContextElement::ListItem(_) => {}
-                        };
-                    }
-                    ret.reverse();
-                    return Ok((remaining, (ret, finish)));
-                }
-                Err(nom::Err::Error(_)) => {
-                    match many_matcher(&current_context, i.clone()) {
-                        Err(nom::Err::Error(err)) => {
-                            return Err(nom::Err::Error(E::append(i, ErrorKind::ManyTill, err)))
-                        }
-                        Err(e) => return Err(e),
-                        Ok((remaining, many_elem)) => {
-                            // infinite loop check: the parser must always consume
-                            if remaining.input_len() == len {
-                                return Err(nom::Err::Error(E::from_error_kind(
-                                    remaining,
-                                    ErrorKind::ManyTill,
-                                )));
-                            }
-
-                            current_context = current_context.with_additional_node(
-                                ContextElement::PreviousElementNode(PreviousElementNode {
-                                    element: many_elem.into(),
-                                }),
-                            );
-                            i = remaining;
-                        }
-                    }
-                }
-                Err(e) => return Err(e),
-            };
-        }
-    }
-}
--- a/src/parser/document.rs
+++ b/src/parser/document.rs
@ -1,26 +1,182 @@
-//! A single element of text.
-use super::combinator::context_many1;
+use nom::branch::alt;
+use nom::bytes::complete::tag;
+use nom::character::complete::line_ending;
+use nom::character::complete::space1;
+use nom::combinator::eof;
+use nom::combinator::map;
+use nom::combinator::not;
+use nom::combinator::opt;
+use nom::combinator::recognize;
+use nom::combinator::verify;
+use nom::multi::many0;
+use nom::multi::many1;
+use nom::multi::many1_count;
+use nom::sequence::tuple;
+
+use crate::parser::element::element;
+use crate::parser::error::CustomError;
+use crate::parser::error::MyError;
+use crate::parser::object::standard_set_object;
+use crate::parser::parser_context::ChainBehavior;
+use crate::parser::parser_context::ContextElement;
+use crate::parser::parser_context::ContextTree;
+use crate::parser::parser_context::ExitMatcherNode;
+
+use super::element::Element;
 use super::error::Res;
-use super::paragraph::paragraph;
-use super::parser_context::ContextTree;
-use super::token::Paragraph;
-use super::token::Token;
+use super::object::Object;
+use super::parser_with_context::parser_with_context;
+use super::source::Source;
+use super::util::get_consumed;
+use super::util::get_one_before;
+use super::util::trailing_whitespace;
 use super::Context;
-use nom::IResult;

-type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult<I, O, E>;
-
-// TODO: Implement FromStr for Document
-
-pub fn document(input: &str) -> Res<&str, Vec<Paragraph>> {
-    let initial_context: ContextTree<'_, '_> = ContextTree::new();
-    let (remaining, tokens) = context_many1(&initial_context, paragraph)(input)?;
-    let paragraphs = tokens
-        .into_iter()
-        .map(|token| match token {
-            Token::TextElement(_) => unreachable!(),
-            Token::Paragraph(paragraph) => paragraph,
-        })
-        .collect();
-    Ok((remaining, paragraphs))
+#[derive(Debug)]
+pub struct Document<'s> {
+    pub source: &'s str,
+    pub zeroth_section: Option<Section<'s>>,
+    pub children: Vec<Heading<'s>>,
+}
+
+#[derive(Debug)]
+pub struct Heading<'s> {
+    pub source: &'s str,
+    pub stars: usize,
+    pub children: Vec<DocumentElement<'s>>,
+}
+
+#[derive(Debug)]
+pub struct Section<'s> {
+    pub source: &'s str,
+    pub children: Vec<Element<'s>>,
+}
+
+#[derive(Debug)]
+pub enum DocumentElement<'s> {
+    Heading(Heading<'s>),
+    Section(Section<'s>),
+}
+
+impl<'s> Source<'s> for Document<'s> {
+    fn get_source(&'s self) -> &'s str {
+        self.source
+    }
+}
+
+impl<'s> Source<'s> for DocumentElement<'s> {
+    fn get_source(&'s self) -> &'s str {
+        match self {
+            DocumentElement::Heading(obj) => obj.source,
+            DocumentElement::Section(obj) => obj.source,
+        }
+    }
+}
+
+#[allow(dead_code)]
+pub fn document(input: &str) -> Res<&str, Document> {
+    let initial_context: ContextTree<'_, '_> = ContextTree::new();
+    let document_context =
+        initial_context.with_additional_node(ContextElement::DocumentRoot(input));
+    let section_matcher = parser_with_context!(section)(&document_context);
+    let heading_matcher = parser_with_context!(heading)(&document_context);
+    let (remaining, zeroth_section) = opt(section_matcher)(input)?;
+    let (remaining, children) = many0(heading_matcher)(remaining)?;
+    let source = get_consumed(input, remaining);
+    Ok((
+        remaining,
+        Document {
+            source,
+            zeroth_section,
+            children,
+        },
+    ))
+}
+
+fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Section<'s>> {
+    // TODO: The zeroth section is specialized so it probably needs its own parser
+    let parser_context = context
+        .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
+            exit_matcher: ChainBehavior::AndParent(Some(&section_end)),
+        }))
+        .with_additional_node(ContextElement::Context("section"));
+    not(|i| parser_context.check_exit_matcher(i))(input)?;
+    let element_matcher = parser_with_context!(element)(&parser_context);
+    let (remaining, children) = many1(element_matcher)(input)?;
+    let source = get_consumed(input, remaining);
+    Ok((remaining, Section { source, children }))
+}
+
+fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
+    let headline_matcher = parser_with_context!(headline)(context);
+    alt((recognize(headline_matcher), eof))(input)
+}
+
+fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Heading<'s>> {
+    not(|i| context.check_exit_matcher(i))(input)?;
+    let (remaining, (star_count, _ws, title, _ws2)) = headline(context, input)?;
+    let section_matcher = parser_with_context!(section)(context);
+    // TODO: This needs to only match headings below the current level
+    let heading_matcher = parser_with_context!(heading)(context);
+    let (remaining, children) = many0(alt((
+        map(
+            verify(heading_matcher, |h| h.stars > star_count),
+            DocumentElement::Heading,
+        ),
+        map(section_matcher, DocumentElement::Section),
+    )))(remaining)?;
+    let source = get_consumed(input, remaining);
+    Ok((
+        remaining,
+        Heading {
+            source,
+            stars: star_count,
+            children,
+        },
+    ))
+}
+
+fn headline<'r, 's>(
+    context: Context<'r, 's>,
+    input: &'s str,
+) -> Res<&'s str, (usize, &'s str, Vec<Object<'s>>, &'s str)> {
+    let parser_context =
+        context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
+            exit_matcher: ChainBehavior::AndParent(Some(&headline_end)),
+        }));
+    let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context);
+    let start_of_line_matcher = parser_with_context!(start_of_line)(&parser_context);
+
+    let (remaining, (_sol, star_count, ws, title, ws2)) = tuple((
+        start_of_line_matcher,
+        many1_count(tag("*")),
+        space1,
+        many1(standard_set_object_matcher),
+        trailing_whitespace,
+    ))(input)?;
+    Ok((remaining, (star_count, ws, title, ws2)))
+}
+
+fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
+    alt((line_ending, eof))(input)
+}
+
+/// Check that we are at the start of a line
+fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
+    let document_root = context.get_document_root().unwrap();
+    let preceding_character = get_one_before(document_root, input)
+        .map(|slice| slice.chars().next())
+        .flatten();
+    match preceding_character {
+        Some('\n') => {}
+        Some(_) => {
+            // Not at start of line, cannot be a heading
+            return Err(nom::Err::Error(CustomError::MyError(MyError(
+                "Not at start of line",
+            ))));
+        }
+        // If None, we are at the start of the file which allows for headings
+        None => {}
+    };
+    Ok((input, ()))
 }
--- a/src/parser/element.rs
+++ b/src/parser/element.rs
@ -0,0 +1,69 @@
+use nom::branch::alt;
+use nom::character::complete::line_ending;
+use nom::character::complete::space0;
+use nom::combinator::eof;
+use nom::combinator::map;
+use nom::combinator::not;
+use nom::combinator::recognize;
+use nom::multi::many0;
+use nom::multi::many1;
+use nom::sequence::tuple;
+
+use crate::parser::object::standard_set_object;
+use crate::parser::parser_context::ChainBehavior;
+use crate::parser::parser_context::ContextElement;
+use crate::parser::parser_context::ExitMatcherNode;
+use crate::parser::parser_with_context::parser_with_context;
+
+use super::error::Res;
+use super::greater_element::PlainList;
+use super::lesser_element::Paragraph;
+use super::source::Source;
+use super::util::blank_line;
+use super::util::get_consumed;
+use super::util::trailing_whitespace;
+use super::Context;
+
+#[derive(Debug)]
+pub enum Element<'s> {
+    Paragraph(Paragraph<'s>),
+    PlainList(PlainList<'s>),
+}
+
+impl<'s> Source<'s> for Element<'s> {
+    fn get_source(&'s self) -> &'s str {
+        match self {
+            Element::Paragraph(obj) => obj.source,
+            Element::PlainList(obj) => obj.source,
+        }
+    }
+}
+
+pub fn element<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Element<'s>> {
+    not(|i| context.check_exit_matcher(i))(input)?;
+
+    let paragraph_matcher = parser_with_context!(paragraph)(context);
+
+    map(paragraph_matcher, Element::Paragraph)(input)
+}
+
+fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Paragraph<'s>> {
+    let parser_context =
+        context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
+            exit_matcher: ChainBehavior::AndParent(Some(&paragraph_end)),
+        }));
+    let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context);
+
+    let (remaining, children) = many1(standard_set_object_matcher)(input)?;
+
+    let (remaining, _trailing_whitespace) = trailing_whitespace(remaining)?;
+
+    let source = get_consumed(input, remaining);
+
+    Ok((remaining, Paragraph { source, children }))
+}
+
+fn paragraph_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
+    // TODO: Other elements should also end paragraphs
+    alt((recognize(tuple((line_ending, many1(blank_line)))), eof))(input)
+}
--- a/src/parser/greater_element.rs
+++ b/src/parser/greater_element.rs
@ -0,0 +1,4 @@
+#[derive(Debug)]
+pub struct PlainList<'s> {
+    pub source: &'s str,
+}
--- a/src/parser/lesser_element.rs
+++ b/src/parser/lesser_element.rs
@ -0,0 +1,7 @@
+use super::object::Object;
+
+#[derive(Debug)]
+pub struct Paragraph<'s> {
+    pub source: &'s str,
+    pub children: Vec<Object<'s>>,
+}
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@ -1,17 +1,20 @@
-mod bold;
+// mod bold;
 mod combinator;
 mod document;
+mod element;
 mod error;
-mod link;
+mod greater_element;
+mod lesser_element;
+// mod link;
 mod list;
-mod paragraph;
+mod object;
+// mod paragraph;
 mod parser_context;
 mod parser_with_context;
-mod plain_list;
-mod text;
-mod token;
+// mod plain_list;
+mod source;
+// mod text;
+// mod token;
 mod util;
 pub use document::document;
 type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>;
-pub use parser_context::ContextTree;
-pub use plain_list::item;
--- a/src/parser/object.rs
+++ b/src/parser/object.rs
@ -0,0 +1,108 @@
+use nom::combinator::map;
+use nom::combinator::not;
+
+use crate::parser::error::CustomError;
+use crate::parser::error::MyError;
+
+use super::error::Res;
+use super::parser_with_context::parser_with_context;
+use super::source::Source;
+use super::Context;
+
+#[derive(Debug)]
+pub enum Object<'s> {
+    TextMarkup(TextMarkup<'s>),
+    PlainText(PlainText<'s>),
+    RegularLink(RegularLink<'s>),
+}
+
+#[derive(Debug)]
+pub struct TextMarkup<'s> {
+    pub source: &'s str,
+}
+
+#[derive(Debug)]
+pub struct PlainText<'s> {
+    pub source: &'s str,
+}
+
+#[derive(Debug)]
+pub struct RegularLink<'s> {
+    pub source: &'s str,
+}
+
+impl<'s> Source<'s> for Object<'s> {
+    fn get_source(&'s self) -> &'s str {
+        match self {
+            Object::TextMarkup(obj) => obj.source,
+            Object::PlainText(obj) => obj.source,
+            Object::RegularLink(obj) => obj.source,
+        }
+    }
+}
+
+pub fn standard_set_object<'r, 's>(
+    context: Context<'r, 's>,
+    input: &'s str,
+) -> Res<&'s str, Object<'s>> {
+    not(|i| context.check_exit_matcher(i))(input)?;
+
+    let plain_text_matcher = parser_with_context!(plain_text)(context);
+
+    map(plain_text_matcher, Object::PlainText)(input)
+}
+
+fn plain_text<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainText<'s>> {
+    if input.len() == 0 {
+        return Err(nom::Err::Error(CustomError::MyError(MyError(
+            "Zero input length to plain_text.",
+        ))));
+    }
+    // not(|i| context.check_exit_matcher(i))(input)?;
+    let mut current_input = input.char_indices();
+    loop {
+        match current_input.next() {
+            Some((offset, _char)) => {
+                let remaining = &input[offset..];
+                let exit_matcher_status = not(|i| context.check_exit_matcher(i))(remaining);
+                if exit_matcher_status.is_err() {
+                    if offset == 0 {
+                        // If we're at the start of the input, then nothing is plain text, so fire an error for zero-length match.
+                        exit_matcher_status?;
+                    } else {
+                        return Ok((
+                            &input[offset..],
+                            PlainText {
+                                source: &input[..offset],
+                            },
+                        ));
+                    }
+                }
+            }
+            None => {
+                // We hit the end of the file, so all input must be plain text
+                return Ok((&input[input.len()..], PlainText { source: input }));
+            }
+        };
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::parser::parser_context::ContextElement;
+    use crate::parser::parser_context::ContextTree;
+
+    use super::*;
+
+    #[test]
+    fn plain_text_simple() {
+        let input = "foobarbaz";
+        let initial_context: ContextTree<'_, '_> = ContextTree::new();
+        let document_context =
+            initial_context.with_additional_node(ContextElement::DocumentRoot(input));
+        let plain_text_matcher = parser_with_context!(plain_text)(&document_context);
+        let (remaining, result) = map(plain_text_matcher, Object::PlainText)(input).unwrap();
+        assert_eq!(remaining, "");
+        assert_eq!(result.get_source(), input);
+    }
+}
--- a/src/parser/old_combinator.rs
+++ b/src/parser/old_combinator.rs
@ -0,0 +1,125 @@
+use super::parser_context::ContextElement;
+use super::parser_context::PreviousElementNode;
+use super::token::Token;
+use super::Context;
+use nom::error::ErrorKind;
+use nom::error::ParseError;
+use nom::IResult;
+use nom::InputLength;
+
+pub fn context_many1<'r, 's, I, O, E, M>(
+    context: Context<'r, 's>,
+    mut many_matcher: M,
+) -> impl FnMut(I) -> IResult<I, Vec<Token<'s>>, E> + 'r
+where
+    I: Clone + InputLength,
+    E: ParseError<I>,
+    M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
+    O: Into<Token<'s>>,
+{
+    move |mut i: I| {
+        let mut err = None;
+        // TODO: Can I eliminate the clone? I think this is incrementing the reference count
+        let mut current_context = context.clone();
+        // Despite the clone, the Rc should still point to the same value.
+        assert!(current_context.ptr_eq(context));
+        loop {
+            match many_matcher(&current_context, i.clone()) {
+                Ok((remaining, many_elem)) => {
+                    current_context = current_context.with_additional_node(
+                        ContextElement::PreviousElementNode(PreviousElementNode {
+                            element: many_elem.into(),
+                        }),
+                    );
+                    i = remaining;
+                }
+                the_error @ Err(_) => {
+                    err = Some(the_error);
+                    break;
+                }
+            }
+        }
+        let mut elements: Vec<Token<'s>> = current_context
+            .into_iter_until(context)
+            .filter_map(|context_element| match context_element {
+                ContextElement::PreviousElementNode(elem) => Some(elem.element),
+                _ => None,
+            })
+            .collect();
+        if elements.is_empty() {
+            if let Some(err) = err {
+                err?;
+            }
+        }
+        elements.reverse();
+        Ok((i, elements))
+    }
+}
+
+pub fn context_many_till<'r, 's, I, O, E, F, M, T>(
+    context: Context<'r, 's>,
+    mut many_matcher: M,
+    mut till_matcher: T,
+) -> impl FnMut(I) -> IResult<I, (Vec<Token<'s>>, F), E> + 'r
+where
+    I: Clone + InputLength,
+    E: ParseError<I>,
+    M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
+    T: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, F, E> + 'r,
+    O: Into<Token<'s>>,
+{
+    move |mut i: I| {
+        // TODO: Can I eliminate the clone? I think this is incrementing the reference count
+        let mut current_context = context.clone();
+        // Despite the clone, the Rc should still point to the same value, otherwise we'll get stuck in an endless loop.
+        assert!(current_context.ptr_eq(context));
+        loop {
+            let len = i.input_len();
+            match till_matcher(&current_context, i.clone()) {
+                Ok((remaining, finish)) => {
+                    let mut ret = Vec::new();
+                    while !current_context.ptr_eq(context) {
+                        let (context_element, next_context) = current_context.pop_front();
+                        let context_element = context_element.expect("We only pop off context elements created in this function, so they are all Some()");
+                        current_context = next_context;
+                        match context_element {
+                            ContextElement::PreviousElementNode(PreviousElementNode {
+                                element: token,
+                            }) => {
+                                ret.push(token);
+                            }
+                            _ => {}
+                        };
+                    }
+                    ret.reverse();
+                    return Ok((remaining, (ret, finish)));
+                }
+                Err(nom::Err::Error(_)) => {
+                    match many_matcher(&current_context, i.clone()) {
+                        Err(nom::Err::Error(err)) => {
+                            return Err(nom::Err::Error(E::append(i, ErrorKind::ManyTill, err)))
+                        }
+                        Err(e) => return Err(e),
+                        Ok((remaining, many_elem)) => {
+                            // infinite loop check: the parser must always consume
+                            if remaining.input_len() == len {
+                                return Err(nom::Err::Error(E::from_error_kind(
+                                    remaining,
+                                    ErrorKind::ManyTill,
+                                )));
+                            }
+
+                            current_context = current_context.with_additional_node(
+                                ContextElement::PreviousElementNode(PreviousElementNode {
+                                    element: many_elem.into(),
+                                }),
+                            );
+                            i = remaining;
+                        }
+                    }
+                }
+                Err(e) => return Err(e),
+            };
+        }
+    }
+}
--- a/src/parser/old_document.rs
+++ b/src/parser/old_document.rs
@ -0,0 +1,29 @@
+//! A single element of text.
+use super::combinator::context_many1;
+use super::error::Res;
+use super::paragraph::paragraph;
+use super::parser_context::ContextElement;
+use super::parser_context::ContextTree;
+use super::token::Paragraph;
+use super::token::Token;
+use super::Context;
+use nom::IResult;
+
+type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult<I, O, E>;
+
+// TODO: Implement FromStr for Document
+
+pub fn document(input: &str) -> Res<&str, Vec<Paragraph>> {
+    let initial_context: ContextTree<'_, '_> = ContextTree::new();
+    let document_context =
+        initial_context.with_additional_node(ContextElement::DocumentRoot(input));
+    let (remaining, tokens) = context_many1(&document_context, paragraph)(input)?;
+    let paragraphs = tokens
+        .into_iter()
+        .map(|token| match token {
+            Token::TextElement(_) => unreachable!(),
+            Token::Paragraph(paragraph) => paragraph,
+        })
+        .collect();
+    Ok((remaining, paragraphs))
+}
--- a/src/parser/parser_context.rs
+++ b/src/parser/parser_context.rs
@ -7,7 +7,6 @@ use super::error::MyError;
 use super::error::Res;
 use super::list::List;
 use super::list::Node;
-use super::token::Token;
 use super::Context;

 type Matcher = dyn for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str>;
@ -90,15 +89,27 @@ impl<'r, 's> ContextTree<'r, 's> {
        // TODO: Make this a specific error instead of just a generic MyError
        return Err(nom::Err::Error(CustomError::MyError(MyError("NoExit"))));
    }
+
+    pub fn get_document_root(&self) -> Option<&'s str> {
+        for current_node in self.iter() {
+            let context_element = current_node.get_data();
+            match context_element {
+                ContextElement::DocumentRoot(body) => {
+                    return Some(body);
+                }
+                _ => {}
+            }
+        }
+        None
+    }
 }

 #[derive(Debug)]
 pub enum ContextElement<'r, 's> {
+    DocumentRoot(&'s str),
    ExitMatcherNode(ExitMatcherNode<'r>),
-    PreviousElementNode(PreviousElementNode<'s>),
    Context(&'r str),
    ListItem(usize),
-    StartOfParagraph,
 }

 #[derive(Debug)]
@ -106,11 +117,6 @@ pub struct ExitMatcherNode<'r> {
    pub exit_matcher: ChainBehavior<'r>,
 }

-#[derive(Debug)]
-pub struct PreviousElementNode<'r> {
-    pub element: Token<'r>,
-}
-
 #[derive(Clone)]
 pub enum ChainBehavior<'r> {
    AndParent(Option<&'r Matcher>),
--- a/src/parser/source.rs
+++ b/src/parser/source.rs
@ -0,0 +1,3 @@
+pub trait Source<'s> {
+    fn get_source(&'s self) -> &'s str;
+}
--- a/src/parser/util.rs
+++ b/src/parser/util.rs
@ -1,16 +1,92 @@
+use nom::branch::alt;
+use nom::character::complete::line_ending;
+use nom::character::complete::space0;
+use nom::combinator::eof;
+use nom::combinator::not;
+use nom::combinator::recognize;
+use nom::multi::many0;
+use nom::sequence::tuple;
+
+use super::error::Res;
 use super::parser_context::ContextElement;
 use super::Context;

+/// Check if we are below a section of the given section type regardless of depth
 pub fn in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool {
    for thing in context.iter() {
        match thing.get_data() {
-            ContextElement::ExitMatcherNode(_) => {}
-            ContextElement::PreviousElementNode(_) => {}
            ContextElement::Context(name) if *name == section_name => return true,
-            ContextElement::Context(_) => {}
-            ContextElement::StartOfParagraph => {} // TODO: If we specialize this to bold then this would be a good spot to stop scanning
-            ContextElement::ListItem(_) => {}
+            _ => {}
        }
    }
    false
 }
+
+/// Checks if we are currently an immediate child of the given section type
+pub fn immediate_in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool {
+    for thing in context.iter() {
+        match thing.get_data() {
+            ContextElement::Context(name) if *name == section_name => return true,
+            ContextElement::Context(name) if *name != section_name => return false,
+            _ => {}
+        }
+    }
+    false
+}
+
+/// Get one character from before the current position.
+pub fn get_one_before<'s>(document: &'s str, current_position: &'s str) -> Option<&'s str> {
+    assert!(is_slice_of(document, current_position));
+    if document.as_ptr() as usize == current_position.as_ptr() as usize {
+        return None;
+    }
+    let offset = current_position.as_ptr() as usize - document.as_ptr() as usize;
+    let previous_character_offset = document.floor_char_boundary(offset - 1);
+    Some(&document[previous_character_offset..offset])
+}
+
+/// Check if the child string slice is a slice of the parent string slice.
+fn is_slice_of(parent: &str, child: &str) -> bool {
+    let parent_start = parent.as_ptr() as usize;
+    let parent_end = parent_start + parent.len();
+    let child_start = child.as_ptr() as usize;
+    let child_end = child_start + child.len();
+    child_start >= parent_start && child_end <= parent_end
+}
+
+/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser.
+pub fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {
+    assert!(is_slice_of(input, remaining));
+    let source = {
+        let offset = remaining.as_ptr() as usize - input.as_ptr() as usize;
+        &input[..offset]
+    };
+    source
+}
+
+/// A line containing only whitespace and then a line break
+///
+/// It is up to the caller to ensure this is called at the start of a line.
+pub fn blank_line(input: &str) -> Res<&str, &str> {
+    not(eof)(input)?;
+    recognize(tuple((space0, alt((line_ending, eof)))))(input)
+}
+
+pub fn trailing_whitespace(input: &str) -> Res<&str, &str> {
+    alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn get_one_before_unicode() {
+        let input = "🧡💛💚💙💜";
+        let (green_heart_index, _) = input.char_indices().skip(2).next().unwrap();
+        let starting_with_green_heart = &input[green_heart_index..];
+        let yellow_heart = get_one_before(input, starting_with_green_heart).unwrap();
+        assert!(is_slice_of(input, yellow_heart));
+        assert_eq!(yellow_heart, "💛");
+    }
+}
--- a/toy_language.txt
+++ b/toy_language.txt
@ -11,3 +11,12 @@ text*


 *nesting *bold entrances* and* exits
+
+* Heading
+
+body of heading
+
+** Child heading
+** Immediate second child heading
+
+* Second top-level heading
				`@ -1 +0,0 @@`
				`Two line breaks to end paragraph except in code blocks`
				`@ -0,0 +1 @@`
				`* Start a document with an immediate heading`