From e6752b9d83a8fbb2919a977e622e67240d4e0318 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sat, 25 Mar 2023 14:10:22 -0400 Subject: [PATCH] Building the plain list item context. --- Makefile | 12 +++-- src/parser/document.rs | 27 +---------- src/parser/parser_context.rs | 5 ++ src/parser/plain_list.rs | 64 +++++++++++++++++++++++++ src/parser/util.rs | 30 ++++++++++++ {src/parser => trash}/old_combinator.rs | 0 {src/parser => trash}/old_document.rs | 0 7 files changed, 110 insertions(+), 28 deletions(-) rename {src/parser => trash}/old_combinator.rs (100%) rename {src/parser => trash}/old_document.rs (100%) diff --git a/Makefile b/Makefile index b90d38b..cdc52da 100644 --- a/Makefile +++ b/Makefile @@ -11,14 +11,20 @@ endif .RECIPEPREFIX = > .PHONY: build -build: target/debug/toy +build: +> cargo build .PHONY: clean clean: > cargo clean -target/debug/toy: -> cargo build +.PHONY: test +test: +> cargo test + +.PHONY: run +run: +> cargo run .PHONY: jaeger jaeger: diff --git a/src/parser/document.rs b/src/parser/document.rs index 389bd29..0b80bdb 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -14,8 +14,6 @@ use nom::multi::many1_count; use nom::sequence::tuple; use crate::parser::element::element; -use crate::parser::error::CustomError; -use crate::parser::error::MyError; use crate::parser::object::standard_set_object; use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; @@ -28,7 +26,7 @@ use super::object::Object; use super::parser_with_context::parser_with_context; use super::source::Source; use super::util::get_consumed; -use super::util::get_one_before; +use super::util::start_of_line; use super::util::trailing_whitespace; use super::Context; @@ -117,7 +115,6 @@ fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Hea not(|i| context.check_exit_matcher(i))(input)?; let (remaining, (star_count, _ws, title, _ws2)) = headline(context, input)?; let section_matcher = parser_with_context!(section)(context); - // TODO: This needs to only match headings below the current level let heading_matcher = parser_with_context!(heading)(context); let (remaining, children) = many0(alt(( map( @@ -159,26 +156,6 @@ fn headline<'r, 's>( Ok((remaining, (star_count, ws, title, ws2))) } -fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { +fn headline_end<'r, 's>(_context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { alt((line_ending, eof))(input) } - -/// Check that we are at the start of a line -fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { - let document_root = context.get_document_root().unwrap(); - let preceding_character = get_one_before(document_root, input) - .map(|slice| slice.chars().next()) - .flatten(); - match preceding_character { - Some('\n') => {} - Some(_) => { - // Not at start of line, cannot be a heading - return Err(nom::Err::Error(CustomError::MyError(MyError( - "Not at start of line", - )))); - } - // If None, we are at the start of the file which allows for headings - None => {} - }; - Ok((input, ())) -} diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index fd2b405..e0cc0ef 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -106,9 +106,14 @@ impl<'r, 's> ContextTree<'r, 's> { #[derive(Debug)] pub enum ContextElement<'r, 's> { + /// Stores a reference to the entire org-mode document being parsed. + /// + /// This is used for look-behind. DocumentRoot(&'s str), ExitMatcherNode(ExitMatcherNode<'r>), Context(&'r str), + + /// Stores the indentation level of the current list item ListItem(usize), } diff --git a/src/parser/plain_list.rs b/src/parser/plain_list.rs index 4c2040d..13e5b69 100644 --- a/src/parser/plain_list.rs +++ b/src/parser/plain_list.rs @@ -1,5 +1,22 @@ +use nom::branch::alt; +use nom::character::complete::space0; +use nom::combinator::eof; +use nom::combinator::not; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::sequence::tuple; + +use crate::parser::parser_context::ChainBehavior; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::util::start_of_line; + +use super::error::CustomError; +use super::error::MyError; use super::error::Res; use super::lesser_element::Paragraph; +use super::parser_with_context::parser_with_context; +use super::util::non_whitespace_character; use super::Context; #[allow(dead_code)] @@ -7,5 +24,52 @@ pub fn plain_list_item<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Paragraph<'s>> { + not(|i| context.check_exit_matcher(i))(input)?; + start_of_line(context, input)?; + let (remaining, leading_whitespace) = space0(input)?; + // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) + let indent_level = leading_whitespace.len(); + let list_item_context = context + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + exit_matcher: ChainBehavior::AndParent(Some(&plain_list_item_end)), + })) + .with_additional_node(ContextElement::ListItem(indent_level)); todo!() } + +fn plain_list_item_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let plain_list_item_matcher = parser_with_context!(plain_list_item)(context); + let line_indented_lte_matcher = parser_with_context!(line_indented_lte)(context); + alt(( + recognize(plain_list_item_matcher), + line_indented_lte_matcher, + eof, + ))(input) +} + +fn line_indented_lte<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let current_item_indent_level: &usize = + get_context_item_indent(context).ok_or(nom::Err::Error(CustomError::MyError(MyError( + "Not inside a plain list item", + ))))?; + + start_of_line(context, input)?; + + let matched = recognize(verify( + tuple((space0::<&str, _>, non_whitespace_character)), + // It is fine that we get the indent level using the number of bytes rather than the number of characters because nom's space0 only matches space and tab (0x20 and 0x09) + |(_space0, _anychar)| _space0.len() <= *current_item_indent_level, + ))(input)?; + + Ok(matched) +} + +fn get_context_item_indent<'r, 's>(context: Context<'r, 's>) -> Option<&'r usize> { + for thing in context.iter() { + match thing.get_data() { + ContextElement::ListItem(depth) => return Some(depth), + _ => {} + }; + } + None +} diff --git a/src/parser/util.rs b/src/parser/util.rs index 9962dc7..8fe28ee 100644 --- a/src/parser/util.rs +++ b/src/parser/util.rs @@ -1,5 +1,6 @@ use nom::branch::alt; use nom::character::complete::line_ending; +use nom::character::complete::none_of; use nom::character::complete::space0; use nom::combinator::eof; use nom::combinator::not; @@ -7,6 +8,8 @@ use nom::combinator::recognize; use nom::multi::many0; use nom::sequence::tuple; +use super::error::CustomError; +use super::error::MyError; use super::error::Res; use super::parser_context::ContextElement; use super::Context; @@ -76,6 +79,33 @@ pub fn trailing_whitespace(input: &str) -> Res<&str, &str> { alt((eof, recognize(tuple((line_ending, many0(blank_line))))))(input) } +/// Check that we are at the start of a line +pub fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + match preceding_character { + Some('\n') => {} + Some(_) => { + // Not at start of line, cannot be a heading + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Not at start of line", + )))); + } + // If None, we are at the start of the file which allows for headings + None => {} + }; + Ok((input, ())) +} + +/// Pull one non-whitespace character. +/// +/// This function only operates on spaces, tabs, carriage returns, and line feeds. It does not handle fancy unicode whitespace. +pub fn non_whitespace_character(input: &str) -> Res<&str, char> { + none_of(" \t\r\n")(input) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/parser/old_combinator.rs b/trash/old_combinator.rs similarity index 100% rename from src/parser/old_combinator.rs rename to trash/old_combinator.rs diff --git a/src/parser/old_document.rs b/trash/old_document.rs similarity index 100% rename from src/parser/old_document.rs rename to trash/old_document.rs