organic/src/parser/document.rs
2023-03-24 17:34:56 -04:00

225 lines
7.6 KiB
Rust

use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::line_ending;
use nom::character::complete::space1;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many1_count;
use nom::sequence::tuple;
use crate::parser::element::element;
use crate::parser::error::CustomError;
use crate::parser::error::MyError;
use crate::parser::object::standard_set_object;
use crate::parser::parser_context::ChainBehavior;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ContextTree;
use crate::parser::parser_context::ExitMatcherNode;
use super::element::Element;
use super::error::Res;
use super::object::Object;
use super::parser_with_context::parser_with_context;
use super::source::Source;
use super::Context;
#[derive(Debug)]
pub struct Document<'s> {
pub source: &'s str,
pub zeroth_section: Option<Section<'s>>,
pub children: Vec<Heading<'s>>,
}
#[derive(Debug)]
pub struct Heading<'s> {
pub source: &'s str,
pub stars: usize,
pub children: Vec<DocumentElement<'s>>,
}
#[derive(Debug)]
pub struct Section<'s> {
pub source: &'s str,
pub children: Vec<Element<'s>>,
}
#[derive(Debug)]
pub enum DocumentElement<'s> {
Heading(Heading<'s>),
Section(Section<'s>),
}
impl<'s> Source<'s> for Document<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}
impl<'s> Source<'s> for DocumentElement<'s> {
fn get_source(&'s self) -> &'s str {
match self {
DocumentElement::Heading(obj) => obj.source,
DocumentElement::Section(obj) => obj.source,
}
}
}
#[allow(dead_code)]
pub fn document(input: &str) -> Res<&str, Document> {
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let section_matcher = parser_with_context!(section)(&document_context);
let heading_matcher = parser_with_context!(heading)(&document_context);
let (remaining, zeroth_section) = opt(section_matcher)(input)?;
let (remaining, children) = many0(heading_matcher)(remaining)?;
let source = get_consumed(input, remaining);
Ok((
remaining,
Document {
source,
zeroth_section,
children,
},
))
}
fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Section<'s>> {
// TODO: The zeroth section is specialized so it probably needs its own parser
let parser_context = context
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&section_end)),
}))
.with_additional_node(ContextElement::Context("section"));
not(|i| parser_context.check_exit_matcher(i))(input)?;
let element_matcher = parser_with_context!(element)(&parser_context);
let (remaining, children) = many1(element_matcher)(input)?;
let source = get_consumed(input, remaining);
Ok((remaining, Section { source, children }))
}
fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let headline_matcher = parser_with_context!(headline)(context);
alt((recognize(headline_matcher), eof))(input)
}
fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Heading<'s>> {
not(|i| context.check_exit_matcher(i))(input)?;
let (remaining, (star_count, _ws, title, _ws2)) = headline(context, input)?;
let section_matcher = parser_with_context!(section)(context);
// TODO: This needs to only match headings below the current level
let heading_matcher = parser_with_context!(heading)(context);
let (remaining, children) = many0(alt((
map(
verify(heading_matcher, |h| h.stars > star_count),
DocumentElement::Heading,
),
map(section_matcher, DocumentElement::Section),
)))(remaining)?;
let source = get_consumed(input, remaining);
Ok((
remaining,
Heading {
source,
stars: star_count,
children,
},
))
}
fn headline<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, (usize, &'s str, Vec<Object<'s>>, &'s str)> {
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
exit_matcher: ChainBehavior::AndParent(Some(&headline_end)),
}));
let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context);
let start_of_line_matcher = parser_with_context!(start_of_line)(&parser_context);
let (remaining, (_sol, star_count, ws, title, ws2)) = tuple((
start_of_line_matcher,
many1_count(tag("*")),
space1,
many1(standard_set_object_matcher),
alt((line_ending, eof)),
))(input)?;
Ok((remaining, (star_count, ws, title, ws2)))
}
fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
line_ending(input)
}
/// Check that we are at the start of a line
fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
match preceding_character {
Some('\n') => {}
Some(_) => {
// Not at start of line, cannot be a heading
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not at start of line",
))));
}
// If None, we are at the start of the file which allows for headings
None => {}
};
Ok((input, ()))
}
/// Get one character from before the current position.
fn get_one_before<'s>(document: &'s str, current_position: &'s str) -> Option<&'s str> {
assert!(is_slice_of(document, current_position));
if document.as_ptr() as usize == current_position.as_ptr() as usize {
return None;
}
let offset = current_position.as_ptr() as usize - document.as_ptr() as usize;
let previous_character_offset = document.floor_char_boundary(offset - 1);
Some(&document[previous_character_offset..offset])
}
/// Check if the child string slice is a slice of the parent string slice.
fn is_slice_of(parent: &str, child: &str) -> bool {
let parent_start = parent.as_ptr() as usize;
let parent_end = parent_start + parent.len();
let child_start = child.as_ptr() as usize;
let child_end = child_start + child.len();
child_start >= parent_start && child_end <= parent_end
}
/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser.
fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {
assert!(is_slice_of(input, remaining));
let source = {
let offset = remaining.as_ptr() as usize - input.as_ptr() as usize;
&input[..offset]
};
source
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn get_one_before_unicode() {
let input = "🧡💛💚💙💜";
let (green_heart_index, _) = input.char_indices().skip(2).next().unwrap();
let starting_with_green_heart = &input[green_heart_index..];
let yellow_heart = get_one_before(input, starting_with_green_heart).unwrap();
assert!(is_slice_of(input, yellow_heart));
assert_eq!(yellow_heart, "💛");
}
}