organic/src/parser/document.rs

use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::line_ending;
use nom::character::complete::space1;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many1_count;
use nom::sequence::tuple;

use crate::parser::element::element;
use crate::parser::error::CustomError;
use crate::parser::error::MyError;
use crate::parser::object::standard_set_object;
use crate::parser::parser_context::ChainBehavior;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ContextTree;
use crate::parser::parser_context::ExitMatcherNode;

use super::element::Element;
use super::error::Res;
use super::object::Object;
use super::parser_with_context::parser_with_context;
use super::source::Source;
use super::Context;

#[derive(Debug)]
pub struct Document<'s> {
    pub source: &'s str,
    pub zeroth_section: Option<Section<'s>>,
    pub children: Vec<Heading<'s>>,
}

#[derive(Debug)]
pub struct Heading<'s> {
    pub source: &'s str,
    pub stars: usize,
    pub children: Vec<DocumentElement<'s>>,
}

#[derive(Debug)]
pub struct Section<'s> {
    pub source: &'s str,
    pub children: Vec<Element<'s>>,
}

#[derive(Debug)]
pub enum DocumentElement<'s> {
    Heading(Heading<'s>),
    Section(Section<'s>),
}

impl<'s> Source<'s> for Document<'s> {
    fn get_source(&'s self) -> &'s str {
        self.source
    }
}

impl<'s> Source<'s> for DocumentElement<'s> {
    fn get_source(&'s self) -> &'s str {
        match self {
            DocumentElement::Heading(obj) => obj.source,
            DocumentElement::Section(obj) => obj.source,
        }
    }
}

#[allow(dead_code)]
pub fn document(input: &str) -> Res<&str, Document> {
    let initial_context: ContextTree<'_, '_> = ContextTree::new();
    let document_context =
        initial_context.with_additional_node(ContextElement::DocumentRoot(input));

    todo!()
}

fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Section<'s>> {
    // TODO: The zeroth section is specialized so it probably needs its own parser
    let parser_context = context
        .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
            exit_matcher: ChainBehavior::AndParent(Some(&section_end)),
        }))
        .with_additional_node(ContextElement::Context("section"));
    not(|i| parser_context.check_exit_matcher(i))(input)?;
    let element_matcher = parser_with_context!(element)(&parser_context);
    let (remaining, children) = many1(element_matcher)(input)?;
    let source = get_consumed(input, remaining);
    Ok((remaining, Section { source, children }))
}

fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
    let heading_matcher = parser_with_context!(heading)(context);
    alt((recognize(heading_matcher), eof))(input)
}

fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Heading<'s>> {
    not(|i| context.check_exit_matcher(i))(input)?;
    let (remaining, (star_count, _ws, title, _ws2)) = headline(context, input)?;
    let section_matcher = parser_with_context!(section)(context);
    // TODO: This needs to only match headings below the current level
    let heading_matcher = parser_with_context!(heading)(context);
    let (remaining, children) = many0(alt((
        map(
            verify(heading_matcher, |h| h.stars > star_count),
            DocumentElement::Heading,
        ),
        map(section_matcher, DocumentElement::Section),
    )))(remaining)?;
    let source = get_consumed(input, remaining);
    Ok((
        remaining,
        Heading {
            source: source,
            stars: star_count,
            children,
        },
    ))
}

fn headline<'r, 's>(
    context: Context<'r, 's>,
    input: &'s str,
) -> Res<&'s str, (usize, &'s str, Vec<Object<'s>>, &'s str)> {
    let parser_context =
        context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
            exit_matcher: ChainBehavior::AndParent(Some(&headline_end)),
        }));
    let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context);
    let start_of_line_matcher = parser_with_context!(start_of_line)(&parser_context);

    let (remaining, (_sol, star_count, ws, title, ws2)) = tuple((
        start_of_line_matcher,
        many1_count(tag("*")),
        space1,
        many1(standard_set_object_matcher),
        alt((line_ending, eof)),
    ))(input)?;
    Ok((remaining, (star_count, ws, title, ws2)))
}

fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
    line_ending(input)
}

/// Check that we are at the start of a line
fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
    let document_root = context.get_document_root().unwrap();
    let preceding_character = get_one_before(document_root, input)
        .map(|slice| slice.chars().next())
        .flatten();
    match preceding_character {
        Some('\n') => {}
        Some(_) => {
            // Not at start of line, cannot be a heading
            return Err(nom::Err::Error(CustomError::MyError(MyError(
                "Not at start of line",
            ))));
        }
        // If None, we are at the start of the file which allows for headings
        None => {}
    };
    Ok((input, ()))
}

/// Get one character from before the current position.
fn get_one_before<'s>(document: &'s str, current_position: &'s str) -> Option<&'s str> {
    assert!(is_slice_of(document, current_position));
    if document.as_ptr() as usize == current_position.as_ptr() as usize {
        return None;
    }
    let offset = current_position.as_ptr() as usize - document.as_ptr() as usize;
    let previous_character_offset = document.floor_char_boundary(offset - 1);
    Some(&document[previous_character_offset..offset])
}

/// Check if the child string slice is a slice of the parent string slice.
fn is_slice_of(parent: &str, child: &str) -> bool {
    let parent_start = parent.as_ptr() as usize;
    let parent_end = parent_start + parent.len();
    let child_start = child.as_ptr() as usize;
    let child_end = child_start + child.len();
    child_start >= parent_start && child_end <= parent_end
}

/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser.
fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {
    assert!(is_slice_of(input, remaining));
    let source = {
        let offset = remaining.as_ptr() as usize - input.as_ptr() as usize;
        &input[..offset]
    };
    source
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn get_one_before_unicode() {
        let input = "🧡💛💚💙💜";
        let (green_heart_index, _) = input.char_indices().skip(2).next().unwrap();
        let starting_with_green_heart = &input[green_heart_index..];
        let yellow_heart = get_one_before(input, starting_with_green_heart).unwrap();
        assert!(is_slice_of(input, yellow_heart));
        assert_eq!(yellow_heart, "💛");
    }
}
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`use nom::branch::alt;`
			`use nom::bytes::complete::tag;`
Ran into a lifetime issue. 2023-03-23 23:53:20 +00:00			`use nom::character::complete::line_ending;`
			`use nom::character::complete::space1;`
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`use nom::combinator::eof;`
Implement heading parser. 2023-03-24 21:19:46 +00:00			`use nom::combinator::map;`
Check the exit matcher in more places. 2023-03-24 00:12:42 +00:00			`use nom::combinator::not;`
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`use nom::combinator::recognize;`
Implement heading parser. 2023-03-24 21:19:46 +00:00			`use nom::combinator::verify;`
			`use nom::multi::many0;`
Solved the lifetime issue by using the standard many1 combinator. 2023-03-24 00:00:35 +00:00			`use nom::multi::many1;`
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`use nom::multi::many1_count;`
			`use nom::sequence::tuple;`

Implement the section parser. 2023-03-24 21:00:27 +00:00			`use crate::parser::element::element;`
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`use crate::parser::error::CustomError;`
			`use crate::parser::error::MyError;`
Ran into a lifetime issue. 2023-03-23 23:53:20 +00:00			`use crate::parser::object::standard_set_object;`
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`use crate::parser::parser_context::ChainBehavior;`
Starting to define document parser. 2023-03-23 21:59:39 +00:00			`use crate::parser::parser_context::ContextElement;`
			`use crate::parser::parser_context::ContextTree;`
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`use crate::parser::parser_context::ExitMatcherNode;`
Starting to define document parser. 2023-03-23 21:59:39 +00:00
Introduce the document structure. 2023-03-23 21:51:49 +00:00			`use super::element::Element;`
Starting to define document parser. 2023-03-23 21:59:39 +00:00			`use super::error::Res;`
Simple version of the headline parser done. 2023-03-24 00:03:45 +00:00			`use super::object::Object;`
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`use super::parser_with_context::parser_with_context;`
Introduce the document structure. 2023-03-23 21:51:49 +00:00			`use super::source::Source;`
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`use super::Context;`
Adding lifetimes. 2022-10-15 00:17:48 +00:00
Introduce the document structure. 2023-03-23 21:51:49 +00:00			`#[derive(Debug)]`
			`pub struct Document<'s> {`
			`pub source: &'s str,`
			`pub zeroth_section: Option<Section<'s>>,`
			`pub children: Vec<Heading<'s>>,`
			`}`

			`#[derive(Debug)]`
			`pub struct Heading<'s> {`
			`pub source: &'s str,`
Implement heading parser. 2023-03-24 21:19:46 +00:00			`pub stars: usize,`
Introduce the document structure. 2023-03-23 21:51:49 +00:00			`pub children: Vec<DocumentElement<'s>>,`
			`}`

			`#[derive(Debug)]`
			`pub struct Section<'s> {`
			`pub source: &'s str,`
			`pub children: Vec<Element<'s>>,`
			`}`

			`#[derive(Debug)]`
			`pub enum DocumentElement<'s> {`
			`Heading(Heading<'s>),`
			`Section(Section<'s>),`
			`}`
Starting an unbound matcher type to allow unbound matchers as parameters to context_many_till. 2022-12-04 03:44:53 +00:00
Introduce the document structure. 2023-03-23 21:51:49 +00:00			`impl<'s> Source<'s> for Document<'s> {`
			`fn get_source(&'s self) -> &'s str {`
			`self.source`
			`}`
			`}`
Add comment. 2022-12-18 09:22:28 +00:00
Introduce the document structure. 2023-03-23 21:51:49 +00:00			`impl<'s> Source<'s> for DocumentElement<'s> {`
			`fn get_source(&'s self) -> &'s str {`
			`match self {`
			`DocumentElement::Heading(obj) => obj.source,`
			`DocumentElement::Section(obj) => obj.source,`
			`}`
			`}`
Move the document parser inside text_element_parser. This is to put the context-sensitive parsers together during this early development stage. 2022-11-27 00:14:19 +00:00			`}`
Starting to define document parser. 2023-03-23 21:59:39 +00:00
Check the exit matcher in more places. 2023-03-24 00:12:42 +00:00			`#[allow(dead_code)]`
Starting to define document parser. 2023-03-23 21:59:39 +00:00			`pub fn document(input: &str) -> Res<&str, Document> {`
			`let initial_context: ContextTree<'_, '_> = ContextTree::new();`
			`let document_context =`
			`initial_context.with_additional_node(ContextElement::DocumentRoot(input));`

			`todo!()`
			`}`
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00
			`fn section<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Section<'s>> {`
			`// TODO: The zeroth section is specialized so it probably needs its own parser`
			`let parser_context = context`
			`.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {`
			`exit_matcher: ChainBehavior::AndParent(Some(&section_end)),`
			`}))`
			`.with_additional_node(ContextElement::Context("section"));`
Check the exit matcher in more places. 2023-03-24 00:12:42 +00:00			`not(\|i\| parser_context.check_exit_matcher(i))(input)?;`
Implement the section parser. 2023-03-24 21:00:27 +00:00			`let element_matcher = parser_with_context!(element)(&parser_context);`
			`let (remaining, children) = many1(element_matcher)(input)?;`
			`let source = get_consumed(input, remaining);`
			`Ok((remaining, Section { source, children }))`
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`}`

			`fn section_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {`
			`let heading_matcher = parser_with_context!(heading)(context);`
			`alt((recognize(heading_matcher), eof))(input)`
			`}`

			`fn heading<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Heading<'s>> {`
Check the exit matcher in more places. 2023-03-24 00:12:42 +00:00			`not(\|i\| context.check_exit_matcher(i))(input)?;`
			`let (remaining, (star_count, _ws, title, _ws2)) = headline(context, input)?;`
Implement heading parser. 2023-03-24 21:19:46 +00:00			`let section_matcher = parser_with_context!(section)(context);`
			`// TODO: This needs to only match headings below the current level`
			`let heading_matcher = parser_with_context!(heading)(context);`
			`let (remaining, children) = many0(alt((`
			`map(`
			`verify(heading_matcher, \|h\| h.stars > star_count),`
			`DocumentElement::Heading,`
			`),`
			`map(section_matcher, DocumentElement::Section),`
			`)))(remaining)?;`
			`let source = get_consumed(input, remaining);`
			`Ok((`
			`remaining,`
			`Heading {`
			`source: source,`
			`stars: star_count,`
			`children,`
			`},`
			`))`
Ran into a lifetime issue. 2023-03-23 23:53:20 +00:00			`}`

Simple version of the headline parser done. 2023-03-24 00:03:45 +00:00			`fn headline<'r, 's>(`
			`context: Context<'r, 's>,`
			`input: &'s str,`
			`) -> Res<&'s str, (usize, &'s str, Vec<Object<'s>>, &'s str)> {`
Ran into a lifetime issue. 2023-03-23 23:53:20 +00:00			`let parser_context =`
			`context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {`
			`exit_matcher: ChainBehavior::AndParent(Some(&headline_end)),`
			`}));`
Solved the lifetime issue by using the standard many1 combinator. 2023-03-24 00:00:35 +00:00			`let standard_set_object_matcher = parser_with_context!(standard_set_object)(&parser_context);`
Create a start_of_line parser. 2023-03-24 20:37:34 +00:00			`let start_of_line_matcher = parser_with_context!(start_of_line)(&parser_context);`
Ran into a lifetime issue. 2023-03-23 23:53:20 +00:00
Create a start_of_line parser. 2023-03-24 20:37:34 +00:00			`let (remaining, (_sol, star_count, ws, title, ws2)) = tuple((`
			`start_of_line_matcher,`
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`many1_count(tag("*")),`
Ran into a lifetime issue. 2023-03-23 23:53:20 +00:00			`space1,`
Solved the lifetime issue by using the standard many1 combinator. 2023-03-24 00:00:35 +00:00			`many1(standard_set_object_matcher),`
			`alt((line_ending, eof)),`
Create a start_of_line parser. 2023-03-24 20:37:34 +00:00			`))(input)?;`
			`Ok((remaining, (star_count, ws, title, ws2)))`
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`}`

Ran into a lifetime issue. 2023-03-23 23:53:20 +00:00			`fn headline_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {`
			`line_ending(input)`
			`}`

Create a start_of_line parser. 2023-03-24 20:37:34 +00:00			`/// Check that we are at the start of a line`
			`fn start_of_line<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {`
			`let document_root = context.get_document_root().unwrap();`
			`let preceding_character = get_one_before(document_root, input)`
			`.map(\|slice\| slice.chars().next())`
			`.flatten();`
			`match preceding_character {`
			`Some('\n') => {}`
			`Some(_) => {`
			`// Not at start of line, cannot be a heading`
			`return Err(nom::Err::Error(CustomError::MyError(MyError(`
			`"Not at start of line",`
			`))));`
			`}`
			`// If None, we are at the start of the file which allows for headings`
			`None => {}`
			`};`
			`Ok((input, ()))`
			`}`

			`/// Get one character from before the current position.`
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`fn get_one_before<'s>(document: &'s str, current_position: &'s str) -> Option<&'s str> {`
			`assert!(is_slice_of(document, current_position));`
			`if document.as_ptr() as usize == current_position.as_ptr() as usize {`
			`return None;`
			`}`
			`let offset = current_position.as_ptr() as usize - document.as_ptr() as usize;`
			`let previous_character_offset = document.floor_char_boundary(offset - 1);`
			`Some(&document[previous_character_offset..offset])`
			`}`

Create a start_of_line parser. 2023-03-24 20:37:34 +00:00			`/// Check if the child string slice is a slice of the parent string slice.`
Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`fn is_slice_of(parent: &str, child: &str) -> bool {`
			`let parent_start = parent.as_ptr() as usize;`
			`let parent_end = parent_start + parent.len();`
			`let child_start = child.as_ptr() as usize;`
			`let child_end = child_start + child.len();`
			`child_start >= parent_start && child_end <= parent_end`
			`}`

Implement the section parser. 2023-03-24 21:00:27 +00:00			`/// Get a slice of the string that was consumed in a parser using the original input to the parser and the remaining input after the parser.`
			`fn get_consumed<'s>(input: &'s str, remaining: &'s str) -> &'s str {`
			`assert!(is_slice_of(input, remaining));`
			`let source = {`
			`let offset = remaining.as_ptr() as usize - input.as_ptr() as usize;`
			`&input[..offset]`
			`};`
			`source`
			`}`

Start writing the parser for headings. 2023-03-23 23:35:32 +00:00			`#[cfg(test)]`
			`mod tests {`
			`use super::*;`

			`#[test]`
			`fn get_one_before_unicode() {`
			`let input = "🧡💛💚💙💜";`
			`let (green_heart_index, _) = input.char_indices().skip(2).next().unwrap();`
			`let starting_with_green_heart = &input[green_heart_index..];`
			`let yellow_heart = get_one_before(input, starting_with_green_heart).unwrap();`
			`assert!(is_slice_of(input, yellow_heart));`
			`assert_eq!(yellow_heart, "💛");`
			`}`
			`}`