From 5c8a064ecad5259df5638bc87011b34a416360b2 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Thu, 23 Mar 2023 19:35:32 -0400 Subject: [PATCH] Start writing the parser for headings. --- src/main.rs | 1 + src/parser/document.rs | 86 ++++++++++++++++++++++++++++++++++++ src/parser/mod.rs | 2 +- src/parser/parser_context.rs | 20 ++++++--- 4 files changed, 101 insertions(+), 8 deletions(-) diff --git a/src/main.rs b/src/main.rs index 8fe09fa..8e4901c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +#![feature(round_char_boundary)] // use crate::parser::document; use tracing::Level; use tracing_subscriber::fmt::format::FmtSpan; diff --git a/src/parser/document.rs b/src/parser/document.rs index a9da830..0d65f6d 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -1,9 +1,22 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::combinator::eof; +use nom::combinator::recognize; +use nom::multi::many1_count; +use nom::sequence::tuple; + +use crate::parser::error::CustomError; +use crate::parser::error::MyError; +use crate::parser::parser_context::ChainBehavior; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ContextTree; +use crate::parser::parser_context::ExitMatcherNode; use super::element::Element; use super::error::Res; +use super::parser_with_context::parser_with_context; use super::source::Source; +use super::Context; #[derive(Debug)] pub struct Document<'s> { @@ -52,3 +65,76 @@ pub fn document(input: &str) -> Res<&str, Document> { todo!() } + +fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Section<'s>> { + // TODO: The zeroth section is specialized so it probably needs its own parser + let parser_context = context + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + exit_matcher: ChainBehavior::AndParent(Some(§ion_end)), + })) + .with_additional_node(ContextElement::Context("section")); + todo!() +} + +fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let heading_matcher = parser_with_context!(heading)(context); + alt((recognize(heading_matcher), eof))(input) +} + +fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Heading<'s>> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + match preceding_character { + Some('\n') => {} + Some(_) => { + // Not at start of line, cannot be a heading + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Heading not at start of line", + )))); + } + // If None, we are at the start of the file which allows for headings + None => {} + }; + + tuple(( + many1_count(tag("*")), + // standard set of objects + ))(input)?; + + todo!() +} + +fn get_one_before<'s>(document: &'s str, current_position: &'s str) -> Option<&'s str> { + assert!(is_slice_of(document, current_position)); + if document.as_ptr() as usize == current_position.as_ptr() as usize { + return None; + } + let offset = current_position.as_ptr() as usize - document.as_ptr() as usize; + let previous_character_offset = document.floor_char_boundary(offset - 1); + Some(&document[previous_character_offset..offset]) +} + +fn is_slice_of(parent: &str, child: &str) -> bool { + let parent_start = parent.as_ptr() as usize; + let parent_end = parent_start + parent.len(); + let child_start = child.as_ptr() as usize; + let child_end = child_start + child.len(); + child_start >= parent_start && child_end <= parent_end +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn get_one_before_unicode() { + let input = "๐Ÿงก๐Ÿ’›๐Ÿ’š๐Ÿ’™๐Ÿ’œ"; + let (green_heart_index, _) = input.char_indices().skip(2).next().unwrap(); + let starting_with_green_heart = &input[green_heart_index..]; + let yellow_heart = get_one_before(input, starting_with_green_heart).unwrap(); + assert!(is_slice_of(input, yellow_heart)); + assert_eq!(yellow_heart, "๐Ÿ’›"); + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 30e2b3f..5d189e2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -14,6 +14,6 @@ mod parser_with_context; // mod plain_list; mod source; // mod text; -mod token; +// mod token; mod util; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>; diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index 1ffe709..cdade55 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -7,7 +7,6 @@ use super::error::MyError; use super::error::Res; use super::list::List; use super::list::Node; -use super::token::Token; use super::Context; type Matcher = dyn for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str>; @@ -90,13 +89,25 @@ impl<'r, 's> ContextTree<'r, 's> { // TODO: Make this a specific error instead of just a generic MyError return Err(nom::Err::Error(CustomError::MyError(MyError("NoExit")))); } + + pub fn get_document_root(&self) -> Option<&'s str> { + for current_node in self.iter() { + let context_element = current_node.get_data(); + match context_element { + ContextElement::DocumentRoot(body) => { + return Some(body); + } + _ => {} + } + } + None + } } #[derive(Debug)] pub enum ContextElement<'r, 's> { DocumentRoot(&'s str), ExitMatcherNode(ExitMatcherNode<'r>), - PreviousElementNode(PreviousElementNode<'s>), Context(&'r str), ListItem(usize), StartOfParagraph, @@ -107,11 +118,6 @@ pub struct ExitMatcherNode<'r> { pub exit_matcher: ChainBehavior<'r>, } -#[derive(Debug)] -pub struct PreviousElementNode<'r> { - pub element: Token<'r>, -} - #[derive(Clone)] pub enum ChainBehavior<'r> { AndParent(Option<&'r Matcher>),