From 7402de6a7c986f26c3ccb3b49a1475807f3c33eb Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 14:20:14 -0400 Subject: [PATCH 01/29] Add a test org document with an asterisk inside the bold. This is to prove that bold can contain an asterisk if it doesn't satisfy the exit matcher. --- .../exit_matcher_investigation/bold_with_asterisk_inside.org | 1 + 1 file changed, 1 insertion(+) create mode 100644 org_mode_samples/exit_matcher_investigation/bold_with_asterisk_inside.org diff --git a/org_mode_samples/exit_matcher_investigation/bold_with_asterisk_inside.org b/org_mode_samples/exit_matcher_investigation/bold_with_asterisk_inside.org new file mode 100644 index 00000000..d990b435 --- /dev/null +++ b/org_mode_samples/exit_matcher_investigation/bold_with_asterisk_inside.org @@ -0,0 +1 @@ +foo *bar baz * lorem* ipsum From 36210c2d7f9e5a23f392aaf6a3a98fe6b81ace0d Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 16:40:39 -0400 Subject: [PATCH 02/29] Add a DocumentRoot context element storing the original full document. This might be used for look-behind instead of storing previous element nodes in the context tree. --- src/parser/bold.rs | 3 +++ src/parser/combinator.rs | 10 ++-------- src/parser/document.rs | 5 ++++- src/parser/parser_context.rs | 1 + src/parser/util.rs | 19 ++++++++++++++----- 5 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/parser/bold.rs b/src/parser/bold.rs index a8dc00aa..004e7f02 100644 --- a/src/parser/bold.rs +++ b/src/parser/bold.rs @@ -96,6 +96,9 @@ fn _preceded_by_whitespace<'r, 's>(context: Context<'r, 's>) -> bool { } ContextElement::Context(_) => {} ContextElement::ListItem(_) => {} + ContextElement::DocumentRoot(_) => { + return true; + } } } else { break; diff --git a/src/parser/combinator.rs b/src/parser/combinator.rs index 4b99f845..5cb31021 100644 --- a/src/parser/combinator.rs +++ b/src/parser/combinator.rs @@ -43,10 +43,7 @@ where .into_iter_until(context) .filter_map(|context_element| match context_element { ContextElement::PreviousElementNode(elem) => Some(elem.element), - ContextElement::ExitMatcherNode(_) => None, - ContextElement::Context(_) => None, - ContextElement::StartOfParagraph => None, - ContextElement::ListItem(_) => None, + _ => None, }) .collect(); if elements.is_empty() { @@ -86,15 +83,12 @@ where let context_element = context_element.expect("We only pop off context elements created in this function, so they are all Some()"); current_context = next_context; match context_element { - ContextElement::ExitMatcherNode(_) => {} - ContextElement::StartOfParagraph => {} - ContextElement::Context(_) => {} ContextElement::PreviousElementNode(PreviousElementNode { element: token, }) => { ret.push(token); } - ContextElement::ListItem(_) => {} + _ => {} }; } ret.reverse(); diff --git a/src/parser/document.rs b/src/parser/document.rs index d9571a8e..1c8dd04a 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -2,6 +2,7 @@ use super::combinator::context_many1; use super::error::Res; use super::paragraph::paragraph; +use super::parser_context::ContextElement; use super::parser_context::ContextTree; use super::token::Paragraph; use super::token::Token; @@ -14,7 +15,9 @@ type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult Res<&str, Vec> { let initial_context: ContextTree<'_, '_> = ContextTree::new(); - let (remaining, tokens) = context_many1(&initial_context, paragraph)(input)?; + let document_context = + initial_context.with_additional_node(ContextElement::DocumentRoot(input)); + let (remaining, tokens) = context_many1(&document_context, paragraph)(input)?; let paragraphs = tokens .into_iter() .map(|token| match token { diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 44fc9e1a..1ffe709a 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -94,6 +94,7 @@ impl<'r, 's> ContextTree<'r, 's> { #[derive(Debug)] pub enum ContextElement<'r, 's> { + DocumentRoot(&'s str), ExitMatcherNode(ExitMatcherNode<'r>), PreviousElementNode(PreviousElementNode<'s>), Context(&'r str), diff --git a/src/parser/util.rs b/src/parser/util.rs index 3600c136..7cee2509 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -1,15 +1,24 @@ use super::parser_context::ContextElement; use super::Context; +/// Check if we are below a section of the given section type regardless of depth pub fn in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool { for thing in context.iter() { match thing.get_data() { - ContextElement::ExitMatcherNode(_) => {} - ContextElement::PreviousElementNode(_) => {} ContextElement::Context(name) if *name == section_name => return true, - ContextElement::Context(_) => {} - ContextElement::StartOfParagraph => {} // TODO: If we specialize this to bold then this would be a good spot to stop scanning - ContextElement::ListItem(_) => {} + _ => {} + } + } + false +} + +/// Checks if we are currently an immediate child of the given section type +pub fn immediate_in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool { + for thing in context.iter() { + match thing.get_data() { + ContextElement::Context(name) if *name == section_name => return true, + ContextElement::Context(name) if *name != section_name => return false, + _ => {} } } false From 87c4a955afc941b16c4328d6e5a7c076aae5379c Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 16:49:52 -0400 Subject: [PATCH 03/29] Starting to define objects based on org-mode's definitions. --- src/parser/mod.rs | 1 + src/parser/object.rs | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 src/parser/object.rs diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 545ef9fc..8190bd8a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4,6 +4,7 @@ mod document; mod error; mod link; mod list; +mod object; mod paragraph; mod parser_context; mod parser_with_context; diff --git a/src/parser/object.rs b/src/parser/object.rs new file mode 100644 index 00000000..0c4b692b --- /dev/null +++ b/src/parser/object.rs @@ -0,0 +1,25 @@ +pub enum Object<'s> { + TextMarkup(TextMarkup<'s>), + PlainText(PlainText<'s>), +} + +pub struct TextMarkup<'s> { + pub source: &'s str, +} + +pub struct PlainText<'s> { + pub source: &'s str, +} + +pub trait Source<'s> { + fn get_source(&'s self) -> &'s str; +} + +impl<'s> Source<'s> for Object<'s> { + fn get_source(&'s self) -> &'s str { + match self { + Object::TextMarkup(obj) => obj.source, + Object::PlainText(obj) => obj.source, + } + } +} From fd45e4381ceeaaf9f12395b4e640133027e2ac48 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 16:57:03 -0400 Subject: [PATCH 04/29] Add a test org-mode document that shows backslashes before a line break create an explicit line break. This line break appears in the output (for example a
in HTML) as opposed to the line breaks in paragraphs syntactically that get reduced to a single space on the same line. --- org_mode_samples/paragraphs/Makefile | 23 +++++++++++++++++++ .../paragraph_with_backslash_line_breaks.org | 7 ++++++ 2 files changed, 30 insertions(+) create mode 100644 org_mode_samples/paragraphs/Makefile create mode 100644 org_mode_samples/paragraphs/paragraph_with_backslash_line_breaks.org diff --git a/org_mode_samples/paragraphs/Makefile b/org_mode_samples/paragraphs/Makefile new file mode 100644 index 00000000..c47a86c1 --- /dev/null +++ b/org_mode_samples/paragraphs/Makefile @@ -0,0 +1,23 @@ +SHELL := bash +.ONESHELL: +.SHELLFLAGS := -eu -o pipefail -c +.DELETE_ON_ERROR: +MAKEFLAGS += --warn-undefined-variables +MAKEFLAGS += --no-builtin-rules +SRCFILES := $(wildcard *.org) +OUTFILES := $(patsubst %.org,%.tree.txt,$(SRCFILES)) + +ifeq ($(origin .RECIPEPREFIX), undefined) + $(error This Make does not support .RECIPEPREFIX. Please use GNU Make 4.0 or later) +endif +.RECIPEPREFIX = > + +.PHONY: all +all: $(OUTFILES) + +.PHONY: clean +clean: +> rm -rf $(OUTFILES) + +%.tree.txt: %.org ../common.el ../dump_org_ast.bash +> ../dump_org_ast.bash $< $@ diff --git a/org_mode_samples/paragraphs/paragraph_with_backslash_line_breaks.org b/org_mode_samples/paragraphs/paragraph_with_backslash_line_breaks.org new file mode 100644 index 00000000..12ce1b7d --- /dev/null +++ b/org_mode_samples/paragraphs/paragraph_with_backslash_line_breaks.org @@ -0,0 +1,7 @@ +This is a paragraph + +This is another paragraph +This is a second line in that paragraph + +This is a third paragraph \\ +This is a second line in that paragraph From 66befc66a9a0a241d934d75f37b7e130f7bfc59a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 17:02:08 -0400 Subject: [PATCH 05/29] Define regular link. --- src/parser/object.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/parser/object.rs b/src/parser/object.rs index 0c4b692b..95f68c72 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -1,6 +1,11 @@ +pub trait Source<'s> { + fn get_source(&'s self) -> &'s str; +} + pub enum Object<'s> { TextMarkup(TextMarkup<'s>), PlainText(PlainText<'s>), + RegularLink(RegularLink<'s>), } pub struct TextMarkup<'s> { @@ -11,8 +16,8 @@ pub struct PlainText<'s> { pub source: &'s str, } -pub trait Source<'s> { - fn get_source(&'s self) -> &'s str; +pub struct RegularLink<'s> { + pub source: &'s str, } impl<'s> Source<'s> for Object<'s> { @@ -20,6 +25,7 @@ impl<'s> Source<'s> for Object<'s> { match self { Object::TextMarkup(obj) => obj.source, Object::PlainText(obj) => obj.source, + Object::RegularLink(obj) => obj.source, } } } From 35d60c10ba676c8d6881ce66efee29d913f1d462 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 17:26:07 -0400 Subject: [PATCH 06/29] Define greater and lesser elements. --- src/parser/element.rs | 7 +++++++ src/parser/greater_element.rs | 4 ++++ src/parser/lesser_element.rs | 4 ++++ src/parser/mod.rs | 3 +++ src/parser/object.rs | 4 ++++ 5 files changed, 22 insertions(+) create mode 100644 src/parser/element.rs create mode 100644 src/parser/greater_element.rs create mode 100644 src/parser/lesser_element.rs diff --git a/src/parser/element.rs b/src/parser/element.rs new file mode 100644 index 00000000..4fcc587a --- /dev/null +++ b/src/parser/element.rs @@ -0,0 +1,7 @@ +use super::greater_element::PlainList; +use super::lesser_element::Paragraph; + +pub enum Element<'s> { + Paragraph(Paragraph<'s>), + PlainList(PlainList<'s>), +} diff --git a/src/parser/greater_element.rs b/src/parser/greater_element.rs new file mode 100644 index 00000000..38d2a7a4 --- /dev/null +++ b/src/parser/greater_element.rs @@ -0,0 +1,4 @@ +#[derive(Debug)] +pub struct PlainList<'s> { + pub source: &'s str, +} diff --git a/src/parser/lesser_element.rs b/src/parser/lesser_element.rs new file mode 100644 index 00000000..83fd38b0 --- /dev/null +++ b/src/parser/lesser_element.rs @@ -0,0 +1,4 @@ +#[derive(Debug)] +pub struct Paragraph<'s> { + pub source: &'s str, +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8190bd8a..e3d5420c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,7 +1,10 @@ mod bold; mod combinator; mod document; +mod element; mod error; +mod greater_element; +mod lesser_element; mod link; mod list; mod object; diff --git a/src/parser/object.rs b/src/parser/object.rs index 95f68c72..d4d4b5e8 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -2,20 +2,24 @@ pub trait Source<'s> { fn get_source(&'s self) -> &'s str; } +#[derive(Debug)] pub enum Object<'s> { TextMarkup(TextMarkup<'s>), PlainText(PlainText<'s>), RegularLink(RegularLink<'s>), } +#[derive(Debug)] pub struct TextMarkup<'s> { pub source: &'s str, } +#[derive(Debug)] pub struct PlainText<'s> { pub source: &'s str, } +#[derive(Debug)] pub struct RegularLink<'s> { pub source: &'s str, } From d98a11059c97839d5d38f6f5e21ef5bd2bdb0ed7 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 17:51:49 -0400 Subject: [PATCH 07/29] Introduce the document structure. --- language_rules.txt | 1 - .../sections_and_headings/Makefile | 23 +++++++ .../immediate_heading.org | 1 + .../sections_and_headings.org | 7 ++ src/main.rs | 8 +-- src/parser/document.rs | 67 +++++++++++-------- src/parser/element.rs | 11 +++ src/parser/mod.rs | 2 +- src/parser/object.rs | 4 +- src/parser/old_document.rs | 29 ++++++++ src/parser/source.rs | 3 + 11 files changed, 120 insertions(+), 36 deletions(-) delete mode 100644 language_rules.txt create mode 100644 org_mode_samples/sections_and_headings/Makefile create mode 100644 org_mode_samples/sections_and_headings/immediate_heading.org create mode 100644 org_mode_samples/sections_and_headings/sections_and_headings.org create mode 100644 src/parser/old_document.rs create mode 100644 src/parser/source.rs diff --git a/language_rules.txt b/language_rules.txt deleted file mode 100644 index 11441967..00000000 --- a/language_rules.txt +++ /dev/null @@ -1 +0,0 @@ -Two line breaks to end paragraph except in code blocks diff --git a/org_mode_samples/sections_and_headings/Makefile b/org_mode_samples/sections_and_headings/Makefile new file mode 100644 index 00000000..c47a86c1 --- /dev/null +++ b/org_mode_samples/sections_and_headings/Makefile @@ -0,0 +1,23 @@ +SHELL := bash +.ONESHELL: +.SHELLFLAGS := -eu -o pipefail -c +.DELETE_ON_ERROR: +MAKEFLAGS += --warn-undefined-variables +MAKEFLAGS += --no-builtin-rules +SRCFILES := $(wildcard *.org) +OUTFILES := $(patsubst %.org,%.tree.txt,$(SRCFILES)) + +ifeq ($(origin .RECIPEPREFIX), undefined) + $(error This Make does not support .RECIPEPREFIX. Please use GNU Make 4.0 or later) +endif +.RECIPEPREFIX = > + +.PHONY: all +all: $(OUTFILES) + +.PHONY: clean +clean: +> rm -rf $(OUTFILES) + +%.tree.txt: %.org ../common.el ../dump_org_ast.bash +> ../dump_org_ast.bash $< $@ diff --git a/org_mode_samples/sections_and_headings/immediate_heading.org b/org_mode_samples/sections_and_headings/immediate_heading.org new file mode 100644 index 00000000..5a8e221b --- /dev/null +++ b/org_mode_samples/sections_and_headings/immediate_heading.org @@ -0,0 +1 @@ +* Start a document with an immediate heading diff --git a/org_mode_samples/sections_and_headings/sections_and_headings.org b/org_mode_samples/sections_and_headings/sections_and_headings.org new file mode 100644 index 00000000..1b49c09c --- /dev/null +++ b/org_mode_samples/sections_and_headings/sections_and_headings.org @@ -0,0 +1,7 @@ +Before the first heading +* The first heading +body of the first section +** Child heading +body of child heading +* second top-level heading +body of second top-level heading diff --git a/src/main.rs b/src/main.rs index a50a86c3..8fe09fa4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,4 @@ -use crate::parser::document; +// use crate::parser::document; use tracing::Level; use tracing_subscriber::fmt::format::FmtSpan; @@ -18,8 +18,8 @@ fn main() -> Result<(), Box> { .with_span_events(FmtSpan::ENTER | FmtSpan::EXIT) .finish(); tracing::subscriber::set_global_default(subscriber)?; - let parsed = document(TEST_DOC); - println!("{}\n\n\n", TEST_DOC); - println!("{:#?}", parsed); + // let parsed = document(TEST_DOC); + // println!("{}\n\n\n", TEST_DOC); + // println!("{:#?}", parsed); Ok(()) } diff --git a/src/parser/document.rs b/src/parser/document.rs index 1c8dd04a..20cd2d9c 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -1,29 +1,42 @@ -//! A single element of text. -use super::combinator::context_many1; -use super::error::Res; -use super::paragraph::paragraph; -use super::parser_context::ContextElement; -use super::parser_context::ContextTree; -use super::token::Paragraph; -use super::token::Token; -use super::Context; -use nom::IResult; +use super::element::Element; +use super::source::Source; -type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult; - -// TODO: Implement FromStr for Document - -pub fn document(input: &str) -> Res<&str, Vec> { - let initial_context: ContextTree<'_, '_> = ContextTree::new(); - let document_context = - initial_context.with_additional_node(ContextElement::DocumentRoot(input)); - let (remaining, tokens) = context_many1(&document_context, paragraph)(input)?; - let paragraphs = tokens - .into_iter() - .map(|token| match token { - Token::TextElement(_) => unreachable!(), - Token::Paragraph(paragraph) => paragraph, - }) - .collect(); - Ok((remaining, paragraphs)) +#[derive(Debug)] +pub struct Document<'s> { + pub source: &'s str, + pub zeroth_section: Option>, + pub children: Vec>, +} + +#[derive(Debug)] +pub struct Heading<'s> { + pub source: &'s str, + pub children: Vec>, +} + +#[derive(Debug)] +pub struct Section<'s> { + pub source: &'s str, + pub children: Vec>, +} + +#[derive(Debug)] +pub enum DocumentElement<'s> { + Heading(Heading<'s>), + Section(Section<'s>), +} + +impl<'s> Source<'s> for Document<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for DocumentElement<'s> { + fn get_source(&'s self) -> &'s str { + match self { + DocumentElement::Heading(obj) => obj.source, + DocumentElement::Section(obj) => obj.source, + } + } } diff --git a/src/parser/element.rs b/src/parser/element.rs index 4fcc587a..eb7ed8c5 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -1,7 +1,18 @@ use super::greater_element::PlainList; use super::lesser_element::Paragraph; +use super::source::Source; +#[derive(Debug)] pub enum Element<'s> { Paragraph(Paragraph<'s>), PlainList(PlainList<'s>), } + +impl<'s> Source<'s> for Element<'s> { + fn get_source(&'s self) -> &'s str { + match self { + Element::Paragraph(obj) => obj.source, + Element::PlainList(obj) => obj.source, + } + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e3d5420c..dceeb837 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -12,10 +12,10 @@ mod paragraph; mod parser_context; mod parser_with_context; mod plain_list; +mod source; mod text; mod token; mod util; -pub use document::document; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; pub use parser_context::ContextTree; pub use plain_list::item; diff --git a/src/parser/object.rs b/src/parser/object.rs index d4d4b5e8..924b873b 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -1,6 +1,4 @@ -pub trait Source<'s> { - fn get_source(&'s self) -> &'s str; -} +use super::source::Source; #[derive(Debug)] pub enum Object<'s> { diff --git a/src/parser/old_document.rs b/src/parser/old_document.rs new file mode 100644 index 00000000..1c8dd04a --- /dev/null +++ b/src/parser/old_document.rs @@ -0,0 +1,29 @@ +//! A single element of text. +use super::combinator::context_many1; +use super::error::Res; +use super::paragraph::paragraph; +use super::parser_context::ContextElement; +use super::parser_context::ContextTree; +use super::token::Paragraph; +use super::token::Token; +use super::Context; +use nom::IResult; + +type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult; + +// TODO: Implement FromStr for Document + +pub fn document(input: &str) -> Res<&str, Vec> { + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let document_context = + initial_context.with_additional_node(ContextElement::DocumentRoot(input)); + let (remaining, tokens) = context_many1(&document_context, paragraph)(input)?; + let paragraphs = tokens + .into_iter() + .map(|token| match token { + Token::TextElement(_) => unreachable!(), + Token::Paragraph(paragraph) => paragraph, + }) + .collect(); + Ok((remaining, paragraphs)) +} diff --git a/src/parser/source.rs b/src/parser/source.rs new file mode 100644 index 00000000..c8e54176 --- /dev/null +++ b/src/parser/source.rs @@ -0,0 +1,3 @@ +pub trait Source<'s> { + fn get_source(&'s self) -> &'s str; +} From 6042c4451426f4dfbf9702db77db60b141b0fcda Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 17:59:39 -0400 Subject: [PATCH 08/29] Starting to define document parser. --- src/parser/document.rs | 12 ++++++++++++ src/parser/mod.rs | 2 -- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index 20cd2d9c..a9da8307 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -1,4 +1,8 @@ +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ContextTree; + use super::element::Element; +use super::error::Res; use super::source::Source; #[derive(Debug)] @@ -40,3 +44,11 @@ impl<'s> Source<'s> for DocumentElement<'s> { } } } + +pub fn document(input: &str) -> Res<&str, Document> { + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let document_context = + initial_context.with_additional_node(ContextElement::DocumentRoot(input)); + + todo!() +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index dceeb837..14f714f5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -17,5 +17,3 @@ mod text; mod token; mod util; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; -pub use parser_context::ContextTree; -pub use plain_list::item; From ee60cf40dd85b37489b3f3a0ddc0da8183475622 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 18:01:33 -0400 Subject: [PATCH 09/29] Disable building the old combinator. --- src/parser/mod.rs | 12 ++++++------ src/parser/{combinator.rs => old_combinator.rs} | 0 2 files changed, 6 insertions(+), 6 deletions(-) rename src/parser/{combinator.rs => old_combinator.rs} (100%) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 14f714f5..e8cf9c49 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,19 +1,19 @@ -mod bold; -mod combinator; +// mod bold; +// mod combinator; mod document; mod element; mod error; mod greater_element; mod lesser_element; -mod link; +// mod link; mod list; mod object; -mod paragraph; +// mod paragraph; mod parser_context; mod parser_with_context; -mod plain_list; +// mod plain_list; mod source; -mod text; +// mod text; mod token; mod util; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; diff --git a/src/parser/combinator.rs b/src/parser/old_combinator.rs similarity index 100% rename from src/parser/combinator.rs rename to src/parser/old_combinator.rs From d3c804942fcc5be529ff15330d0d812d7191599e Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 18:25:00 -0400 Subject: [PATCH 10/29] Simplified context_many1 based on the assumption that we will not use previous element context elements. --- src/parser/combinator.rs | 38 ++++++++++++++++++++++++++++++++++++++ src/parser/mod.rs | 2 +- 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 src/parser/combinator.rs diff --git a/src/parser/combinator.rs b/src/parser/combinator.rs new file mode 100644 index 00000000..ceab6c81 --- /dev/null +++ b/src/parser/combinator.rs @@ -0,0 +1,38 @@ +use nom::error::ParseError; +use nom::IResult; +use nom::InputLength; + +use super::Context; + +pub fn context_many1<'r: 's, 's, I, O, E, M>( + context: Context<'r, 's>, + mut many_matcher: M, +) -> impl FnMut(I) -> IResult, E> + 'r +where + I: Clone + InputLength, + E: ParseError, + M: for<'x> Fn(Context<'x, 's>, I) -> IResult + 'r, +{ + move |mut i: I| { + let mut err = None; + let mut elements: Vec = Vec::new(); + loop { + match many_matcher(&context, i.clone()) { + Ok((remaining, many_elem)) => { + i = remaining; + elements.push(many_elem); + } + the_error @ Err(_) => { + err = Some(the_error); + break; + } + } + } + if elements.is_empty() { + if let Some(err) = err { + err?; + } + } + Ok((i, elements)) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e8cf9c49..30e2b3f7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,5 +1,5 @@ // mod bold; -// mod combinator; +mod combinator; mod document; mod element; mod error; From 5c8a064ecad5259df5638bc87011b34a416360b2 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 19:35:32 -0400 Subject: [PATCH 11/29] Start writing the parser for headings. --- src/main.rs | 1 + src/parser/document.rs | 86 ++++++++++++++++++++++++++++++++++++ src/parser/mod.rs | 2 +- src/parser/parser_context.rs | 20 ++++++--- 4 files changed, 101 insertions(+), 8 deletions(-) diff --git a/src/main.rs b/src/main.rs index 8fe09fa4..8e4901cf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +#![feature(round_char_boundary)] // use crate::parser::document; use tracing::Level; use tracing_subscriber::fmt::format::FmtSpan; diff --git a/src/parser/document.rs b/src/parser/document.rs index a9da8307..0d65f6d9 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -1,9 +1,22 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::combinator::eof; +use nom::combinator::recognize; +use nom::multi::many1_count; +use nom::sequence::tuple; + +use crate::parser::error::CustomError; +use crate::parser::error::MyError; +use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; +use crate::parser::parser_context::ExitMatcherNode; use super::element::Element; use super::error::Res; +use super::parser_with_context::parser_with_context; use super::source::Source; +use super::Context; #[derive(Debug)] pub struct Document<'s> { @@ -52,3 +65,76 @@ pub fn document(input: &str) -> Res<&str, Document> { todo!() } + +fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Section<'s>> { + // TODO: The zeroth section is specialized so it probably needs its own parser + let parser_context = context + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + exit_matcher: ChainBehavior::AndParent(Some(§ion_end)), + })) + .with_additional_node(ContextElement::Context("section")); + todo!() +} + +fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let heading_matcher = parser_with_context!(heading)(context); + alt((recognize(heading_matcher), eof))(input) +} + +fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Heading<'s>> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + match preceding_character { + Some('\n') => {} + Some(_) => { + // Not at start of line, cannot be a heading + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Heading not at start of line", + )))); + } + // If None, we are at the start of the file which allows for headings + None => {} + }; + + tuple(( + many1_count(tag("*")), + // standard set of objects + ))(input)?; + + todo!() +} + +fn get_one_before<'s>(document: &'s str, current_position: &'s str) -> Option<&'s str> { + assert!(is_slice_of(document, current_position)); + if document.as_ptr() as usize == current_position.as_ptr() as usize { + return None; + } + let offset = current_position.as_ptr() as usize - document.as_ptr() as usize; + let previous_character_offset = document.floor_char_boundary(offset - 1); + Some(&document[previous_character_offset..offset]) +} + +fn is_slice_of(parent: &str, child: &str) -> bool { + let parent_start = parent.as_ptr() as usize; + let parent_end = parent_start + parent.len(); + let child_start = child.as_ptr() as usize; + let child_end = child_start + child.len(); + child_start >= parent_start && child_end <= parent_end +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn get_one_before_unicode() { + let input = "๐Ÿงก๐Ÿ’›๐Ÿ’š๐Ÿ’™๐Ÿ’œ"; + let (green_heart_index, _) = input.char_indices().skip(2).next().unwrap(); + let starting_with_green_heart = &input[green_heart_index..]; + let yellow_heart = get_one_before(input, starting_with_green_heart).unwrap(); + assert!(is_slice_of(input, yellow_heart)); + assert_eq!(yellow_heart, "๐Ÿ’›"); + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 30e2b3f7..5d189e28 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -14,6 +14,6 @@ mod parser_with_context; // mod plain_list; mod source; // mod text; -mod token; +// mod token; mod util; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 1ffe709a..cdade556 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -7,7 +7,6 @@ use super::error::MyError; use super::error::Res; use super::list::List; use super::list::Node; -use super::token::Token; use super::Context; type Matcher = dyn for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str>; @@ -90,13 +89,25 @@ impl<'r, 's> ContextTree<'r, 's> { // TODO: Make this a specific error instead of just a generic MyError return Err(nom::Err::Error(CustomError::MyError(MyError("NoExit")))); } + + pub fn get_document_root(&self) -> Option<&'s str> { + for current_node in self.iter() { + let context_element = current_node.get_data(); + match context_element { + ContextElement::DocumentRoot(body) => { + return Some(body); + } + _ => {} + } + } + None + } } #[derive(Debug)] pub enum ContextElement<'r, 's> { DocumentRoot(&'s str), ExitMatcherNode(ExitMatcherNode<'r>), - PreviousElementNode(PreviousElementNode<'s>), Context(&'r str), ListItem(usize), StartOfParagraph, @@ -107,11 +118,6 @@ pub struct ExitMatcherNode<'r> { pub exit_matcher: ChainBehavior<'r>, } -#[derive(Debug)] -pub struct PreviousElementNode<'r> { - pub element: Token<'r>, -} - #[derive(Clone)] pub enum ChainBehavior<'r> { AndParent(Option<&'r Matcher>), From 02d04b59dbb488758d24c1932136e85d1e58f383 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 19:53:20 -0400 Subject: [PATCH 12/29] Ran into a lifetime issue. --- src/parser/document.rs | 25 ++++++++++++++++++++++--- src/parser/object.rs | 9 +++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index 0d65f6d9..5fdec0d7 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -1,12 +1,16 @@ use nom::branch::alt; use nom::bytes::complete::tag; +use nom::character::complete::line_ending; +use nom::character::complete::space1; use nom::combinator::eof; use nom::combinator::recognize; use nom::multi::many1_count; use nom::sequence::tuple; +use crate::parser::combinator::context_many1; use crate::parser::error::CustomError; use crate::parser::error::MyError; +use crate::parser::object::standard_set_object; use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; @@ -82,13 +86,17 @@ fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, } fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Heading<'s>> { + todo!() +} + +fn headline<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { let document_root = context.get_document_root().unwrap(); let preceding_character = get_one_before(document_root, input) .map(|slice| slice.chars().next()) .flatten(); match preceding_character { Some('\n') => {} - Some(_) => { + Some(_) => { // Not at start of line, cannot be a heading return Err(nom::Err::Error(CustomError::MyError(MyError( "Heading not at start of line", @@ -98,14 +106,25 @@ fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Hea None => {} }; - tuple(( + let parser_context = + context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + exit_matcher: ChainBehavior::AndParent(Some(&headline_end)), + })); + let title_matcher = parser_with_context!(context_many1)(&parser_context); + + let foo = tuple(( many1_count(tag("*")), - // standard set of objects + space1, + title_matcher(standard_set_object), ))(input)?; todo!() } +fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + line_ending(input) +} + fn get_one_before<'s>(document: &'s str, current_position: &'s str) -> Option<&'s str> { assert!(is_slice_of(document, current_position)); if document.as_ptr() as usize == current_position.as_ptr() as usize { diff --git a/src/parser/object.rs b/src/parser/object.rs index 924b873b..407e891e 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -1,4 +1,6 @@ +use super::error::Res; use super::source::Source; +use super::Context; #[derive(Debug)] pub enum Object<'s> { @@ -31,3 +33,10 @@ impl<'s> Source<'s> for Object<'s> { } } } + +pub fn standard_set_object<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, Object<'s>> { + todo!() +} From 4f10f2abecf312638393eecfb278d950cfdd1e17 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 20:00:35 -0400 Subject: [PATCH 13/29] Solved the lifetime issue by using the standard many1 combinator. --- src/parser/document.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index 5fdec0d7..a84a0d15 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -4,10 +4,10 @@ use nom::character::complete::line_ending; use nom::character::complete::space1; use nom::combinator::eof; use nom::combinator::recognize; +use nom::multi::many1; use nom::multi::many1_count; use nom::sequence::tuple; -use crate::parser::combinator::context_many1; use crate::parser::error::CustomError; use crate::parser::error::MyError; use crate::parser::object::standard_set_object; @@ -110,12 +110,13 @@ fn headline<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &' context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { exit_matcher: ChainBehavior::AndParent(Some(&headline_end)), })); - let title_matcher = parser_with_context!(context_many1)(&parser_context); + let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context); let foo = tuple(( many1_count(tag("*")), space1, - title_matcher(standard_set_object), + many1(standard_set_object_matcher), + alt((line_ending, eof)), ))(input)?; todo!() From 3502a31b2875f119503af77dfcbc3fcd9cdc8658 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 20:03:45 -0400 Subject: [PATCH 14/29] Simple version of the headline parser done. --- src/parser/document.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index a84a0d15..e1feb20f 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -18,6 +18,7 @@ use crate::parser::parser_context::ExitMatcherNode; use super::element::Element; use super::error::Res; +use super::object::Object; use super::parser_with_context::parser_with_context; use super::source::Source; use super::Context; @@ -89,7 +90,10 @@ fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Hea todo!() } -fn headline<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { +fn headline<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, (usize, &'s str, Vec>, &'s str)> { let document_root = context.get_document_root().unwrap(); let preceding_character = get_one_before(document_root, input) .map(|slice| slice.chars().next()) @@ -112,14 +116,13 @@ fn headline<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &' })); let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context); - let foo = tuple(( + let ret = tuple(( many1_count(tag("*")), space1, many1(standard_set_object_matcher), alt((line_ending, eof)), - ))(input)?; - - todo!() + ))(input); + ret } fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { From ceb7788cfa0db46e9c9c89bb85a856c74b3fe9cf Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 20:12:42 -0400 Subject: [PATCH 15/29] Check the exit matcher in more places. --- src/parser/document.rs | 5 +++++ src/parser/object.rs | 3 +++ 2 files changed, 8 insertions(+) diff --git a/src/parser/document.rs b/src/parser/document.rs index e1feb20f..ebfb4814 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -3,6 +3,7 @@ use nom::bytes::complete::tag; use nom::character::complete::line_ending; use nom::character::complete::space1; use nom::combinator::eof; +use nom::combinator::not; use nom::combinator::recognize; use nom::multi::many1; use nom::multi::many1_count; @@ -63,6 +64,7 @@ impl<'s> Source<'s> for DocumentElement<'s> { } } +#[allow(dead_code)] pub fn document(input: &str) -> Res<&str, Document> { let initial_context: ContextTree<'_, '_> = ContextTree::new(); let document_context = @@ -78,6 +80,7 @@ fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Sec exit_matcher: ChainBehavior::AndParent(Some(§ion_end)), })) .with_additional_node(ContextElement::Context("section")); + not(|i| parser_context.check_exit_matcher(i))(input)?; todo!() } @@ -87,6 +90,8 @@ fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, } fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Heading<'s>> { + not(|i| context.check_exit_matcher(i))(input)?; + let (remaining, (star_count, _ws, title, _ws2)) = headline(context, input)?; todo!() } diff --git a/src/parser/object.rs b/src/parser/object.rs index 407e891e..417bb78d 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -1,3 +1,5 @@ +use nom::combinator::not; + use super::error::Res; use super::source::Source; use super::Context; @@ -38,5 +40,6 @@ pub fn standard_set_object<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Object<'s>> { + not(|i| context.check_exit_matcher(i))(input)?; todo!() } From 754c1922df05323e4908243ae91460f05256e4eb Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 24 Mar 2023 16:37:34 -0400 Subject: [PATCH 16/29] Create a start_of_line parser. --- src/parser/document.rs | 46 +++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index ebfb4814..bec0cab0 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -99,6 +99,29 @@ fn headline<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, (usize, &'s str, Vec>, &'s str)> { + let parser_context = + context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + exit_matcher: ChainBehavior::AndParent(Some(&headline_end)), + })); + let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context); + let start_of_line_matcher = parser_with_context!(start_of_line)(&parser_context); + + let (remaining, (_sol, star_count, ws, title, ws2)) = tuple(( + start_of_line_matcher, + many1_count(tag("*")), + space1, + many1(standard_set_object_matcher), + alt((line_ending, eof)), + ))(input)?; + Ok((remaining, (star_count, ws, title, ws2))) +} + +fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + line_ending(input) +} + +/// Check that we are at the start of a line +fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { let document_root = context.get_document_root().unwrap(); let preceding_character = get_one_before(document_root, input) .map(|slice| slice.chars().next()) @@ -108,32 +131,16 @@ fn headline<'r, 's>( Some(_) => { // Not at start of line, cannot be a heading return Err(nom::Err::Error(CustomError::MyError(MyError( - "Heading not at start of line", + "Not at start of line", )))); } // If None, we are at the start of the file which allows for headings None => {} }; - - let parser_context = - context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { - exit_matcher: ChainBehavior::AndParent(Some(&headline_end)), - })); - let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context); - - let ret = tuple(( - many1_count(tag("*")), - space1, - many1(standard_set_object_matcher), - alt((line_ending, eof)), - ))(input); - ret -} - -fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { - line_ending(input) + Ok((input, ())) } +/// Get one character from before the current position. fn get_one_before<'s>(document: &'s str, current_position: &'s str) -> Option<&'s str> { assert!(is_slice_of(document, current_position)); if document.as_ptr() as usize == current_position.as_ptr() as usize { @@ -144,6 +151,7 @@ fn get_one_before<'s>(document: &'s str, current_position: &'s str) -> Option<&' Some(&document[previous_character_offset..offset]) } +/// Check if the child string slice is a slice of the parent string slice. fn is_slice_of(parent: &str, child: &str) -> bool { let parent_start = parent.as_ptr() as usize; let parent_end = parent_start + parent.len(); From dc9f3eb2e6c6fb289e08dcbe9c980e4eb7d9e267 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 24 Mar 2023 17:00:27 -0400 Subject: [PATCH 17/29] Implement the section parser. --- src/parser/document.rs | 16 +++++++++++++++- src/parser/element.rs | 9 +++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index bec0cab0..32c408ff 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -9,6 +9,7 @@ use nom::multi::many1; use nom::multi::many1_count; use nom::sequence::tuple; +use crate::parser::element::element; use crate::parser::error::CustomError; use crate::parser::error::MyError; use crate::parser::object::standard_set_object; @@ -81,7 +82,10 @@ fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Sec })) .with_additional_node(ContextElement::Context("section")); not(|i| parser_context.check_exit_matcher(i))(input)?; - todo!() + let element_matcher = parser_with_context!(element)(&parser_context); + let (remaining, children) = many1(element_matcher)(input)?; + let source = get_consumed(input, remaining); + Ok((remaining, Section { source, children })) } fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { @@ -160,6 +164,16 @@ fn is_slice_of(parent: &str, child: &str) -> bool { child_start >= parent_start && child_end <= parent_end } +/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser. +fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { + assert!(is_slice_of(input, remaining)); + let source = { + let offset = remaining.as_ptr() as usize - input.as_ptr() as usize; + &input[..offset] + }; + source +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/parser/element.rs b/src/parser/element.rs index eb7ed8c5..ecfa993e 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -1,6 +1,10 @@ +use nom::combinator::not; + +use super::error::Res; use super::greater_element::PlainList; use super::lesser_element::Paragraph; use super::source::Source; +use super::Context; #[derive(Debug)] pub enum Element<'s> { @@ -16,3 +20,8 @@ impl<'s> Source<'s> for Element<'s> { } } } + +pub fn element<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Element<'s>> { + not(|i| context.check_exit_matcher(i))(input)?; + todo!() +} From 8013f127df8c189bdfbd936e9ec8a299d7a5f861 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 24 Mar 2023 17:19:46 -0400 Subject: [PATCH 18/29] Implement heading parser. --- src/parser/document.rs | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index 32c408ff..7127f3cb 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -3,8 +3,11 @@ use nom::bytes::complete::tag; use nom::character::complete::line_ending; use nom::character::complete::space1; use nom::combinator::eof; +use nom::combinator::map; use nom::combinator::not; use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many0; use nom::multi::many1; use nom::multi::many1_count; use nom::sequence::tuple; @@ -35,6 +38,7 @@ pub struct Document<'s> { #[derive(Debug)] pub struct Heading<'s> { pub source: &'s str, + pub stars: usize, pub children: Vec>, } @@ -96,7 +100,25 @@ fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Heading<'s>> { not(|i| context.check_exit_matcher(i))(input)?; let (remaining, (star_count, _ws, title, _ws2)) = headline(context, input)?; - todo!() + let section_matcher = parser_with_context!(section)(context); + // TODO: This needs to only match headings below the current level + let heading_matcher = parser_with_context!(heading)(context); + let (remaining, children) = many0(alt(( + map( + verify(heading_matcher, |h| h.stars > star_count), + DocumentElement::Heading, + ), + map(section_matcher, DocumentElement::Section), + )))(remaining)?; + let source = get_consumed(input, remaining); + Ok(( + remaining, + Heading { + source: source, + stars: star_count, + children, + }, + )) } fn headline<'r, 's>( From 7ab3df69385389ed0e02c2bf70658c0a4b38126c Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 24 Mar 2023 17:30:33 -0400 Subject: [PATCH 19/29] Use headline instead of heading for section_end. This should be more performant. --- src/parser/document.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index 7127f3cb..eeed9c53 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -93,8 +93,8 @@ fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Sec } fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { - let heading_matcher = parser_with_context!(heading)(context); - alt((recognize(heading_matcher), eof))(input) + let headline_matcher = parser_with_context!(headline)(context); + alt((recognize(headline_matcher), eof))(input) } fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Heading<'s>> { From f2d16d302a3a16a2d421e9ec880ee2b608af2338 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 24 Mar 2023 17:34:56 -0400 Subject: [PATCH 20/29] Implement document parser. --- src/parser/document.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index eeed9c53..a256f5ac 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -5,6 +5,7 @@ use nom::character::complete::space1; use nom::combinator::eof; use nom::combinator::map; use nom::combinator::not; +use nom::combinator::opt; use nom::combinator::recognize; use nom::combinator::verify; use nom::multi::many0; @@ -74,8 +75,19 @@ pub fn document(input: &str) -> Res<&str, Document> { let initial_context: ContextTree<'_, '_> = ContextTree::new(); let document_context = initial_context.with_additional_node(ContextElement::DocumentRoot(input)); - - todo!() + let section_matcher = parser_with_context!(section)(&document_context); + let heading_matcher = parser_with_context!(heading)(&document_context); + let (remaining, zeroth_section) = opt(section_matcher)(input)?; + let (remaining, children) = many0(heading_matcher)(remaining)?; + let source = get_consumed(input, remaining); + Ok(( + remaining, + Document { + source, + zeroth_section, + children, + }, + )) } fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Section<'s>> { @@ -114,7 +126,7 @@ fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Hea Ok(( remaining, Heading { - source: source, + source, stars: star_count, children, }, From a6cf1adf4b063cee900d6934243f096fa343ec36 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 10:07:47 -0400 Subject: [PATCH 21/29] Add a top-level Makefile. This is primarily to automate launching jaeger in docker for easier viewing of traces. --- Makefile | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..b90d38b5 --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +SHELL := bash +.ONESHELL: +.SHELLFLAGS := -eu -o pipefail -c +.DELETE_ON_ERROR: +MAKEFLAGS += --warn-undefined-variables +MAKEFLAGS += --no-builtin-rules + +ifeq ($(origin .RECIPEPREFIX), undefined) + $(error This Make does not support .RECIPEPREFIX. Please use GNU Make 4.0 or later) +endif +.RECIPEPREFIX = > + +.PHONY: build +build: target/debug/toy + +.PHONY: clean +clean: +> cargo clean + +target/debug/toy: +> cargo build + +.PHONY: jaeger +jaeger: +> docker run -d --rm -p 6831:6831/udp -p 6832:6832/udp -p 16686:16686 -p 14268:14268 jaegertracing/all-in-one:latest From b88365e7eb439d1d115bf1811b38276fad203458 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 10:45:43 -0400 Subject: [PATCH 22/29] Wrote plain text parser. --- src/parser/object.rs | 45 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/src/parser/object.rs b/src/parser/object.rs index 417bb78d..ee2c5d36 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -1,6 +1,11 @@ +use nom::combinator::map; use nom::combinator::not; +use crate::parser::error::CustomError; +use crate::parser::error::MyError; + use super::error::Res; +use super::parser_with_context::parser_with_context; use super::source::Source; use super::Context; @@ -41,5 +46,43 @@ pub fn standard_set_object<'r, 's>( input: &'s str, ) -> Res<&'s str, Object<'s>> { not(|i| context.check_exit_matcher(i))(input)?; - todo!() + + let plain_text_matcher = parser_with_context!(plain_text)(context); + + map(plain_text_matcher, Object::PlainText)(input) +} + +fn plain_text<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainText<'s>> { + if input.len() == 0 { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Zero input length to plain_text.", + )))); + } + // not(|i| context.check_exit_matcher(i))(input)?; + let mut current_input = input.char_indices(); + loop { + match current_input.next() { + Some((offset, _char)) => { + let remaining = &input[offset..]; + let exit_matcher_status = context.check_exit_matcher(remaining); + if exit_matcher_status.is_err() { + if offset == 0 { + // If we're at the start of the input, then nothing is plain text, so fire an error for zero-length match. + exit_matcher_status?; + } else { + return Ok(( + &input[offset..], + PlainText { + source: &input[..offset], + }, + )); + } + } + } + None => { + // We hit the end of the file, so all input must be plain text + return Ok((&input[input.len()..], PlainText { source: input })); + } + }; + } } From d582c8603a4ed534dcd5eb5b2bd73be60dfedb95 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 11:22:59 -0400 Subject: [PATCH 23/29] Implement a basic paragraph parser. --- src/parser/document.rs | 4 +-- src/parser/element.rs | 47 +++++++++++++++++++++++++++++++++++- src/parser/lesser_element.rs | 3 +++ 3 files changed, 51 insertions(+), 3 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index a256f5ac..52fa5f38 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -155,7 +155,7 @@ fn headline<'r, 's>( } fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { - line_ending(input) + alt((line_ending, eof))(input) } /// Check that we are at the start of a line @@ -199,7 +199,7 @@ fn is_slice_of(parent: &str, child: &str) -> bool { } /// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser. -fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { +pub fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { assert!(is_slice_of(input, remaining)); let source = { let offset = remaining.as_ptr() as usize - input.as_ptr() as usize; diff --git a/src/parser/element.rs b/src/parser/element.rs index ecfa993e..aa42936e 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -1,4 +1,19 @@ +use nom::branch::alt; +use nom::character::complete::line_ending; +use nom::character::complete::space0; +use nom::combinator::eof; +use nom::combinator::map; use nom::combinator::not; +use nom::combinator::recognize; +use nom::multi::many1; +use nom::sequence::tuple; + +use crate::parser::document::get_consumed; +use crate::parser::object::standard_set_object; +use crate::parser::parser_context::ChainBehavior; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::parser_with_context::parser_with_context; use super::error::Res; use super::greater_element::PlainList; @@ -23,5 +38,35 @@ impl<'s> Source<'s> for Element<'s> { pub fn element<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Element<'s>> { not(|i| context.check_exit_matcher(i))(input)?; - todo!() + + let paragraph_matcher = parser_with_context!(paragraph)(context); + + map(paragraph_matcher, Element::Paragraph)(input) +} + +fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Paragraph<'s>> { + let parser_context = + context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + exit_matcher: ChainBehavior::AndParent(Some(¶graph_end)), + })); + let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context); + + let (remaining, children) = many1(standard_set_object_matcher)(input)?; + + let source = get_consumed(input, remaining); + + Ok((remaining, Paragraph { source, children })) +} + +fn paragraph_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + // TODO: Other elements should also end paragraphs + alt((recognize(tuple((line_ending, many1(blank_line)))), eof))(input) +} + +/// A line containing only whitespace and then a line break +/// +/// It is up to the caller to ensure this is called at the start of a line. +fn blank_line(input: &str) -> Res<&str, &str> { + not(eof)(input)?; + recognize(tuple((space0, alt((line_ending, eof)))))(input) } diff --git a/src/parser/lesser_element.rs b/src/parser/lesser_element.rs index 83fd38b0..5abc4b38 100644 --- a/src/parser/lesser_element.rs +++ b/src/parser/lesser_element.rs @@ -1,4 +1,7 @@ +use super::object::Object; + #[derive(Debug)] pub struct Paragraph<'s> { pub source: &'s str, + pub children: Vec>, } From 3c26933e7faa5604d38ac59e165d191edf33d25b Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 11:25:10 -0400 Subject: [PATCH 24/29] Move some functions into util. --- src/parser/document.rs | 47 ++---------------------------------------- src/parser/element.rs | 2 +- src/parser/util.rs | 45 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 46 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index 52fa5f38..352ddc81 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -27,6 +27,8 @@ use super::error::Res; use super::object::Object; use super::parser_with_context::parser_with_context; use super::source::Source; +use super::util::get_consumed; +use super::util::get_one_before; use super::Context; #[derive(Debug)] @@ -177,48 +179,3 @@ fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st }; Ok((input, ())) } - -/// Get one character from before the current position. -fn get_one_before<'s>(document: &'s str, current_position: &'s str) -> Option<&'s str> { - assert!(is_slice_of(document, current_position)); - if document.as_ptr() as usize == current_position.as_ptr() as usize { - return None; - } - let offset = current_position.as_ptr() as usize - document.as_ptr() as usize; - let previous_character_offset = document.floor_char_boundary(offset - 1); - Some(&document[previous_character_offset..offset]) -} - -/// Check if the child string slice is a slice of the parent string slice. -fn is_slice_of(parent: &str, child: &str) -> bool { - let parent_start = parent.as_ptr() as usize; - let parent_end = parent_start + parent.len(); - let child_start = child.as_ptr() as usize; - let child_end = child_start + child.len(); - child_start >= parent_start && child_end <= parent_end -} - -/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser. -pub fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { - assert!(is_slice_of(input, remaining)); - let source = { - let offset = remaining.as_ptr() as usize - input.as_ptr() as usize; - &input[..offset] - }; - source -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn get_one_before_unicode() { - let input = "๐Ÿงก๐Ÿ’›๐Ÿ’š๐Ÿ’™๐Ÿ’œ"; - let (green_heart_index, _) = input.char_indices().skip(2).next().unwrap(); - let starting_with_green_heart = &input[green_heart_index..]; - let yellow_heart = get_one_before(input, starting_with_green_heart).unwrap(); - assert!(is_slice_of(input, yellow_heart)); - assert_eq!(yellow_heart, "๐Ÿ’›"); - } -} diff --git a/src/parser/element.rs b/src/parser/element.rs index aa42936e..5a0fa108 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -8,7 +8,6 @@ use nom::combinator::recognize; use nom::multi::many1; use nom::sequence::tuple; -use crate::parser::document::get_consumed; use crate::parser::object::standard_set_object; use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; @@ -19,6 +18,7 @@ use super::error::Res; use super::greater_element::PlainList; use super::lesser_element::Paragraph; use super::source::Source; +use super::util::get_consumed; use super::Context; #[derive(Debug)] diff --git a/src/parser/util.rs b/src/parser/util.rs index 7cee2509..cf4b3f79 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -23,3 +23,48 @@ pub fn immediate_in_section<'r, 's, 'x>(context: Context<'r, 's>, section_name: } false } + +/// Get one character from before the current position. +pub fn get_one_before<'s>(document: &'s str, current_position: &'s str) -> Option<&'s str> { + assert!(is_slice_of(document, current_position)); + if document.as_ptr() as usize == current_position.as_ptr() as usize { + return None; + } + let offset = current_position.as_ptr() as usize - document.as_ptr() as usize; + let previous_character_offset = document.floor_char_boundary(offset - 1); + Some(&document[previous_character_offset..offset]) +} + +/// Check if the child string slice is a slice of the parent string slice. +fn is_slice_of(parent: &str, child: &str) -> bool { + let parent_start = parent.as_ptr() as usize; + let parent_end = parent_start + parent.len(); + let child_start = child.as_ptr() as usize; + let child_end = child_start + child.len(); + child_start >= parent_start && child_end <= parent_end +} + +/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser. +pub fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { + assert!(is_slice_of(input, remaining)); + let source = { + let offset = remaining.as_ptr() as usize - input.as_ptr() as usize; + &input[..offset] + }; + source +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn get_one_before_unicode() { + let input = "๐Ÿงก๐Ÿ’›๐Ÿ’š๐Ÿ’™๐Ÿ’œ"; + let (green_heart_index, _) = input.char_indices().skip(2).next().unwrap(); + let starting_with_green_heart = &input[green_heart_index..]; + let yellow_heart = get_one_before(input, starting_with_green_heart).unwrap(); + assert!(is_slice_of(input, yellow_heart)); + assert_eq!(yellow_heart, "๐Ÿ’›"); + } +} From 2ab407bb83abd33c264abe990042b9567aecab42 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 11:27:38 -0400 Subject: [PATCH 25/29] Invoke the document parser. --- src/main.rs | 8 ++++---- src/parser/mod.rs | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index 8e4901cf..6906a5b5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ #![feature(round_char_boundary)] -// use crate::parser::document; +use crate::parser::document; use tracing::Level; use tracing_subscriber::fmt::format::FmtSpan; @@ -19,8 +19,8 @@ fn main() -> Result<(), Box> { .with_span_events(FmtSpan::ENTER | FmtSpan::EXIT) .finish(); tracing::subscriber::set_global_default(subscriber)?; - // let parsed = document(TEST_DOC); - // println!("{}\n\n\n", TEST_DOC); - // println!("{:#?}", parsed); + let parsed = document(TEST_DOC); + println!("{}\n\n\n", TEST_DOC); + println!("{:#?}", parsed); Ok(()) } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5d189e28..18cb3147 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -16,4 +16,5 @@ mod source; // mod text; // mod token; mod util; +pub use document::document; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; From faf2bb401daac3340c45c5aff06693f038078655 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 11:40:12 -0400 Subject: [PATCH 26/29] Fix plain text matcher exit conditions. --- src/parser/object.rs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/parser/object.rs b/src/parser/object.rs index ee2c5d36..aaf20240 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -64,7 +64,7 @@ fn plain_text<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, match current_input.next() { Some((offset, _char)) => { let remaining = &input[offset..]; - let exit_matcher_status = context.check_exit_matcher(remaining); + let exit_matcher_status = not(|i| context.check_exit_matcher(i))(remaining); if exit_matcher_status.is_err() { if offset == 0 { // If we're at the start of the input, then nothing is plain text, so fire an error for zero-length match. @@ -86,3 +86,23 @@ fn plain_text<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, }; } } + +#[cfg(test)] +mod tests { + use crate::parser::parser_context::ContextElement; + use crate::parser::parser_context::ContextTree; + + use super::*; + + #[test] + fn plain_text_simple() { + let input = "foobarbaz"; + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let document_context = + initial_context.with_additional_node(ContextElement::DocumentRoot(input)); + let plain_text_matcher = parser_with_context!(plain_text)(&document_context); + let (remaining, result) = map(plain_text_matcher, Object::PlainText)(input).unwrap(); + assert_eq!(remaining, ""); + assert_eq!(result.get_source(), input); + } +} From b65c2f86b5a64117c8a6a7d0c041f5b001b70dae Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 11:49:56 -0400 Subject: [PATCH 27/29] Consume trailing whitespace in paragraphs. --- src/parser/element.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/parser/element.rs b/src/parser/element.rs index 5a0fa108..88d44cc5 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -5,6 +5,7 @@ use nom::combinator::eof; use nom::combinator::map; use nom::combinator::not; use nom::combinator::recognize; +use nom::multi::many0; use nom::multi::many1; use nom::sequence::tuple; @@ -53,6 +54,9 @@ fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, P let (remaining, children) = many1(standard_set_object_matcher)(input)?; + let (remaining, _trailing_whitespace) = + alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(remaining)?; + let source = get_consumed(input, remaining); Ok((remaining, Paragraph { source, children })) From 3a0a4c89532c0b425688aa5b090022cb9ef3fc66 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 11:59:19 -0400 Subject: [PATCH 28/29] Consume the trailing whitespace after a headline. --- src/parser/document.rs | 3 ++- src/parser/element.rs | 13 +++---------- src/parser/util.rs | 22 ++++++++++++++++++++++ toy_language.txt | 4 ++++ 4 files changed, 31 insertions(+), 11 deletions(-) diff --git a/src/parser/document.rs b/src/parser/document.rs index 352ddc81..81f1a68c 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -29,6 +29,7 @@ use super::parser_with_context::parser_with_context; use super::source::Source; use super::util::get_consumed; use super::util::get_one_before; +use super::util::trailing_whitespace; use super::Context; #[derive(Debug)] @@ -151,7 +152,7 @@ fn headline<'r, 's>( many1_count(tag("*")), space1, many1(standard_set_object_matcher), - alt((line_ending, eof)), + trailing_whitespace, ))(input)?; Ok((remaining, (star_count, ws, title, ws2))) } diff --git a/src/parser/element.rs b/src/parser/element.rs index 88d44cc5..3ae3d1a3 100644 --- a/src/parser/element.rs +++ b/src/parser/element.rs @@ -19,7 +19,9 @@ use super::error::Res; use super::greater_element::PlainList; use super::lesser_element::Paragraph; use super::source::Source; +use super::util::blank_line; use super::util::get_consumed; +use super::util::trailing_whitespace; use super::Context; #[derive(Debug)] @@ -54,8 +56,7 @@ fn paragraph<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, P let (remaining, children) = many1(standard_set_object_matcher)(input)?; - let (remaining, _trailing_whitespace) = - alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(remaining)?; + let (remaining, _trailing_whitespace) = trailing_whitespace(remaining)?; let source = get_consumed(input, remaining); @@ -66,11 +67,3 @@ fn paragraph_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s st // TODO: Other elements should also end paragraphs alt((recognize(tuple((line_ending, many1(blank_line)))), eof))(input) } - -/// A line containing only whitespace and then a line break -/// -/// It is up to the caller to ensure this is called at the start of a line. -fn blank_line(input: &str) -> Res<&str, &str> { - not(eof)(input)?; - recognize(tuple((space0, alt((line_ending, eof)))))(input) -} diff --git a/src/parser/util.rs b/src/parser/util.rs index cf4b3f79..9962dc78 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -1,3 +1,13 @@ +use nom::branch::alt; +use nom::character::complete::line_ending; +use nom::character::complete::space0; +use nom::combinator::eof; +use nom::combinator::not; +use nom::combinator::recognize; +use nom::multi::many0; +use nom::sequence::tuple; + +use super::error::Res; use super::parser_context::ContextElement; use super::Context; @@ -54,6 +64,18 @@ pub fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str { source } +/// A line containing only whitespace and then a line break +/// +/// It is up to the caller to ensure this is called at the start of a line. +pub fn blank_line(input: &str) -> Res<&str, &str> { + not(eof)(input)?; + recognize(tuple((space0, alt((line_ending, eof)))))(input) +} + +pub fn trailing_whitespace(input: &str) -> Res<&str, &str> { + alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input) +} + #[cfg(test)] mod tests { use super::*; diff --git a/toy_language.txt b/toy_language.txt index 2f8aa337..614291fd 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -11,3 +11,7 @@ text* *nesting *bold entrances* and* exits + +* Heading + +body of heading From e3f6dd497aa740b47e81f988c685de3912e7a1fa Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 12:01:50 -0400 Subject: [PATCH 29/29] Test cases showing headings handling nesting properly. --- src/parser/parser_context.rs | 1 - toy_language.txt | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index cdade556..fd2b4058 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -110,7 +110,6 @@ pub enum ContextElement<'r, 's> { ExitMatcherNode(ExitMatcherNode<'r>), Context(&'r str), ListItem(usize), - StartOfParagraph, } #[derive(Debug)] diff --git a/toy_language.txt b/toy_language.txt index 614291fd..ac4a2d64 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -15,3 +15,8 @@ text* * Heading body of heading + +** Child heading +** Immediate second child heading + +* Second top-level heading