From d98a11059c97839d5d38f6f5e21ef5bd2bdb0ed7 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 17:51:49 -0400 Subject: [PATCH] Introduce the document structure. --- language_rules.txt | 1 - .../sections_and_headings/Makefile | 23 +++++++ .../immediate_heading.org | 1 + .../sections_and_headings.org | 7 ++ src/main.rs | 8 +-- src/parser/document.rs | 67 +++++++++++-------- src/parser/element.rs | 11 +++ src/parser/mod.rs | 2 +- src/parser/object.rs | 4 +- src/parser/old_document.rs | 29 ++++++++ src/parser/source.rs | 3 + 11 files changed, 120 insertions(+), 36 deletions(-) delete mode 100644 language_rules.txt create mode 100644 org_mode_samples/sections_and_headings/Makefile create mode 100644 org_mode_samples/sections_and_headings/immediate_heading.org create mode 100644 org_mode_samples/sections_and_headings/sections_and_headings.org create mode 100644 src/parser/old_document.rs create mode 100644 src/parser/source.rs diff --git a/language_rules.txt b/language_rules.txt deleted file mode 100644 index 11441967..00000000 --- a/language_rules.txt +++ /dev/null @@ -1 +0,0 @@ -Two line breaks to end paragraph except in code blocks diff --git a/org_mode_samples/sections_and_headings/Makefile b/org_mode_samples/sections_and_headings/Makefile new file mode 100644 index 00000000..c47a86c1 --- /dev/null +++ b/org_mode_samples/sections_and_headings/Makefile @@ -0,0 +1,23 @@ +SHELL := bash +.ONESHELL: +.SHELLFLAGS := -eu -o pipefail -c +.DELETE_ON_ERROR: +MAKEFLAGS += --warn-undefined-variables +MAKEFLAGS += --no-builtin-rules +SRCFILES := $(wildcard *.org) +OUTFILES := $(patsubst %.org,%.tree.txt,$(SRCFILES)) + +ifeq ($(origin .RECIPEPREFIX), undefined) + $(error This Make does not support .RECIPEPREFIX. Please use GNU Make 4.0 or later) +endif +.RECIPEPREFIX = > + +.PHONY: all +all: $(OUTFILES) + +.PHONY: clean +clean: +> rm -rf $(OUTFILES) + +%.tree.txt: %.org ../common.el ../dump_org_ast.bash +> ../dump_org_ast.bash $< $@ diff --git a/org_mode_samples/sections_and_headings/immediate_heading.org b/org_mode_samples/sections_and_headings/immediate_heading.org new file mode 100644 index 00000000..5a8e221b --- /dev/null +++ b/org_mode_samples/sections_and_headings/immediate_heading.org @@ -0,0 +1 @@ +* Start a document with an immediate heading diff --git a/org_mode_samples/sections_and_headings/sections_and_headings.org b/org_mode_samples/sections_and_headings/sections_and_headings.org new file mode 100644 index 00000000..1b49c09c --- /dev/null +++ b/org_mode_samples/sections_and_headings/sections_and_headings.org @@ -0,0 +1,7 @@ +Before the first heading +* The first heading +body of the first section +** Child heading +body of child heading +* second top-level heading +body of second top-level heading diff --git a/src/main.rs b/src/main.rs index a50a86c3..8fe09fa4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,4 @@ -use crate::parser::document; +// use crate::parser::document; use tracing::Level; use tracing_subscriber::fmt::format::FmtSpan; @@ -18,8 +18,8 @@ fn main() -> Result<(), Box> { .with_span_events(FmtSpan::ENTER | FmtSpan::EXIT) .finish(); tracing::subscriber::set_global_default(subscriber)?; - let parsed = document(TEST_DOC); - println!("{}\n\n\n", TEST_DOC); - println!("{:#?}", parsed); + // let parsed = document(TEST_DOC); + // println!("{}\n\n\n", TEST_DOC); + // println!("{:#?}", parsed); Ok(()) } diff --git a/src/parser/document.rs b/src/parser/document.rs index 1c8dd04a..20cd2d9c 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -1,29 +1,42 @@ -//! A single element of text. -use super::combinator::context_many1; -use super::error::Res; -use super::paragraph::paragraph; -use super::parser_context::ContextElement; -use super::parser_context::ContextTree; -use super::token::Paragraph; -use super::token::Token; -use super::Context; -use nom::IResult; +use super::element::Element; +use super::source::Source; -type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult; - -// TODO: Implement FromStr for Document - -pub fn document(input: &str) -> Res<&str, Vec> { - let initial_context: ContextTree<'_, '_> = ContextTree::new(); - let document_context = - initial_context.with_additional_node(ContextElement::DocumentRoot(input)); - let (remaining, tokens) = context_many1(&document_context, paragraph)(input)?; - let paragraphs = tokens - .into_iter() - .map(|token| match token { - Token::TextElement(_) => unreachable!(), - Token::Paragraph(paragraph) => paragraph, - }) - .collect(); - Ok((remaining, paragraphs)) +#[derive(Debug)] +pub struct Document<'s> { + pub source: &'s str, + pub zeroth_section: Option>, + pub children: Vec>, +} + +#[derive(Debug)] +pub struct Heading<'s> { + pub source: &'s str, + pub children: Vec>, +} + +#[derive(Debug)] +pub struct Section<'s> { + pub source: &'s str, + pub children: Vec>, +} + +#[derive(Debug)] +pub enum DocumentElement<'s> { + Heading(Heading<'s>), + Section(Section<'s>), +} + +impl<'s> Source<'s> for Document<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for DocumentElement<'s> { + fn get_source(&'s self) -> &'s str { + match self { + DocumentElement::Heading(obj) => obj.source, + DocumentElement::Section(obj) => obj.source, + } + } } diff --git a/src/parser/element.rs b/src/parser/element.rs index 4fcc587a..eb7ed8c5 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -1,7 +1,18 @@ use super::greater_element::PlainList; use super::lesser_element::Paragraph; +use super::source::Source; +#[derive(Debug)] pub enum Element<'s> { Paragraph(Paragraph<'s>), PlainList(PlainList<'s>), } + +impl<'s> Source<'s> for Element<'s> { + fn get_source(&'s self) -> &'s str { + match self { + Element::Paragraph(obj) => obj.source, + Element::PlainList(obj) => obj.source, + } + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e3d5420c..dceeb837 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -12,10 +12,10 @@ mod paragraph; mod parser_context; mod parser_with_context; mod plain_list; +mod source; mod text; mod token; mod util; -pub use document::document; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; pub use parser_context::ContextTree; pub use plain_list::item; diff --git a/src/parser/object.rs b/src/parser/object.rs index d4d4b5e8..924b873b 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -1,6 +1,4 @@ -pub trait Source<'s> { - fn get_source(&'s self) -> &'s str; -} +use super::source::Source; #[derive(Debug)] pub enum Object<'s> { diff --git a/src/parser/old_document.rs b/src/parser/old_document.rs new file mode 100644 index 00000000..1c8dd04a --- /dev/null +++ b/src/parser/old_document.rs @@ -0,0 +1,29 @@ +//! A single element of text. +use super::combinator::context_many1; +use super::error::Res; +use super::paragraph::paragraph; +use super::parser_context::ContextElement; +use super::parser_context::ContextTree; +use super::token::Paragraph; +use super::token::Token; +use super::Context; +use nom::IResult; + +type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult; + +// TODO: Implement FromStr for Document + +pub fn document(input: &str) -> Res<&str, Vec> { + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let document_context = + initial_context.with_additional_node(ContextElement::DocumentRoot(input)); + let (remaining, tokens) = context_many1(&document_context, paragraph)(input)?; + let paragraphs = tokens + .into_iter() + .map(|token| match token { + Token::TextElement(_) => unreachable!(), + Token::Paragraph(paragraph) => paragraph, + }) + .collect(); + Ok((remaining, paragraphs)) +} diff --git a/src/parser/source.rs b/src/parser/source.rs new file mode 100644 index 00000000..c8e54176 --- /dev/null +++ b/src/parser/source.rs @@ -0,0 +1,3 @@ +pub trait Source<'s> { + fn get_source(&'s self) -> &'s str; +}