organic/src/parser/text_element_parser.rs

//! A single element of text.
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::text::paragraph_end;

use super::nom_context::ChainBehavior;
use super::nom_context::ContextElement;
use super::nom_context::ContextTree;
use super::nom_context::ExitMatcherNode;
use super::nom_context::PreviousElementNode;
use super::text::bold_end;
use super::text::bold_start;
use super::text::line_break;
use super::text::link_end;
use super::text::link_start;
use super::text::space;
use super::text::span;
use super::text::symbol;
use super::text::Bold;
use super::text::Link;
use super::text::Res;
use super::text::TextElement;
use super::token::Token;
use super::Context;
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::bytes::complete::take;
use nom::combinator::cond;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::error::ErrorKind;
use nom::error::ParseError;
use nom::error::VerboseError;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::tuple;
use nom::IResult;
use nom::InputLength;

type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult<I, O, E>;

fn context_many_till<'r, 's, I, O, E, F, M, T>(
    context: Context<'r, 's>,
    mut many_matcher: M,
    mut till_matcher: T,
) -> impl FnMut(I) -> IResult<I, (Vec<Token<'s>>, F), E> + 'r
where
    I: Clone + InputLength,
    E: ParseError<I>,
    M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
    T: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, F, E> + 'r,
    O: Into<Token<'s>>,
{
    move |mut i: I| {
        // TODO: Can I eliminate the clone?
        let mut current_context = context.clone();
        // Despite the clone, the Rc should still point to the same value, otherwise we'll get stuck in an endless loop.
        assert!(current_context.ptr_eq(context));
        loop {
            let len = i.input_len();
            match till_matcher(&current_context, i.clone()) {
                Ok((remaining, finish)) => {
                    let mut ret = Vec::new();
                    while !current_context.ptr_eq(context) {
                        let (context_element, next_context) = current_context.pop_front();
                        let context_element = context_element.expect("We only pop off context elements created in this function, so they are all Some()");
                        current_context = next_context;
                        match context_element {
                            ContextElement::ExitMatcherNode(_) => {}
                            ContextElement::StartOfParagraph => {}
                            ContextElement::Context(_) => {}
                            ContextElement::PreviousElementNode(PreviousElementNode {
                                element: token,
                            }) => {
                                ret.push(token);
                            }
                        };
                    }
                    ret.reverse();
                    return Ok((remaining, (ret, finish)));
                }
                Err(nom::Err::Error(_)) => {
                    match many_matcher(&current_context, i.clone()) {
                        Err(nom::Err::Error(err)) => {
                            return Err(nom::Err::Error(E::append(i, ErrorKind::ManyTill, err)))
                        }
                        Err(e) => return Err(e),
                        Ok((remaining, many_elem)) => {
                            // infinite loop check: the parser must always consume
                            if remaining.input_len() == len {
                                return Err(nom::Err::Error(E::from_error_kind(
                                    remaining,
                                    ErrorKind::ManyTill,
                                )));
                            }

                            current_context = current_context.with_additional_node(
                                ContextElement::PreviousElementNode(PreviousElementNode {
                                    element: many_elem.into(),
                                }),
                            );
                            i = remaining;
                        }
                    }
                }
                Err(e) => return Err(e),
            };
        }
    }
}

pub fn document(input: &str) -> Res<&str, Vec<(Vec<TextElement>, &str)>> {
    let initial_context: ContextTree<'_, '_> = ContextTree::new();
    let paragraph_parser = parser_with_context!(paragraph);
    let ret = many1(paragraph_parser(&initial_context))(input);
    ret
}

pub fn context_paragraph_end<'s, 'r>(
    context: Context<'r, 's>,
    input: &'s str,
) -> Res<&'s str, &'s str> {
    paragraph_end(input)
}

fn can_start_bold<'s, 'r>(context: Context<'r, 's>) -> bool {
    _preceded_by_whitespace(context) && !_in_section(context, "bold")
}

fn _in_section<'s, 'r, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool {
    for thing in context.iter() {
        match thing.get_data() {
            ContextElement::ExitMatcherNode(_) => {}
            ContextElement::PreviousElementNode(_) => {}
            ContextElement::Context(name) if *name == section_name => return true,
            ContextElement::Context(_) => {}
            ContextElement::StartOfParagraph => {} // TODO: If we specialize this to bold then this would be a good spot to stop scanning
        }
    }
    false
}

fn _preceded_by_whitespace<'s, 'r>(context: Context<'r, 's>) -> bool {
    let mut context_iterator = context.iter().enumerate();
    loop {
        if let Some((i, ctx)) = context_iterator.next() {
            match ctx.get_data() {
                ContextElement::ExitMatcherNode(_) => {}
                ContextElement::PreviousElementNode(previous_element_node) => {
                    match &previous_element_node.element {
                        Token::TextElement(text_element) => {
                            match text_element {
                                TextElement::Span(_) => return false,
                                TextElement::Space(_) => return true,
                                TextElement::LineBreak(_) => return true,
                                TextElement::Symbol(_) => return false,
                                TextElement::Bold(_) => return false,
                                TextElement::Link(_) => return false,
                            };
                        }
                    };
                }
                ContextElement::StartOfParagraph => {
                    return true;
                }
                ContextElement::Context(_) => {}
            }
        } else {
            break;
        }
    }
    false
}

pub fn context_bold_start<'s, 'r>(
    context: Context<'r, 's>,
    input: &'s str,
) -> Res<&'s str, &'s str> {
    if can_start_bold(context) {
        recognize(bold_start)(input)
    } else {
        // TODO: Make this a custom error
        not(take(0usize))(input)?;
        unreachable!()
    }
}

pub fn context_bold_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
    let (remaining, actual_match) = recognize(bold_end)(input)?;
    peek(alt((
        // Must have whitespace after the end asterisk or it must be the end of that section (as checked by the exit matcher)
        tag(" "),
        tag("\t"),
        tag("\n"),
        |i| context.check_exit_matcher(i),
    )))(remaining)?;

    Ok((remaining, actual_match))
}

pub fn paragraph<'s, 'r>(
    context: Context<'r, 's>,
    i: &'s str,
) -> Res<&'s str, (Vec<TextElement<'s>>, &'s str)> {
    // Add a not(eof) check because many_till cannot match a zero-length string
    not(eof)(i)?;
    let paragraph_context = context
        .with_additional_node(ContextElement::StartOfParagraph)
        .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
            exit_matcher: ChainBehavior::AndParent(Some(&context_paragraph_end)),
        }));
    let (remaining, (many, till)) =
        context_many_till(&paragraph_context, flat_text_element, context_paragraph_end)(i)?;
    let many = many
        .into_iter()
        .filter_map(|token| match token {
            Token::TextElement(text_element) => Some(text_element),
        })
        .collect();
    Ok((remaining, (many, till)))
}

fn flat_text_element<'s, 'r>(
    context: Context<'r, 's>,
    i: &'s str,
) -> Res<&'s str, TextElement<'s>> {
    not(|i| context.check_exit_matcher(i))(i)?;

    let bold_matcher = parser_with_context!(flat_bold)(&context);
    let link_matcher = parser_with_context!(flat_link)(&context);

    alt((
        map(bold_matcher, TextElement::Bold),
        map(link_matcher, TextElement::Link),
        map(span, TextElement::Span),
        map(symbol("*"), TextElement::Symbol),
        map(symbol("["), TextElement::Symbol),
        map(symbol("]"), TextElement::Symbol),
        map(space, TextElement::Space),
        map(line_break, TextElement::LineBreak),
    ))(i)
}

fn flat_bold<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Bold<'s>> {
    let bold_start = parser_with_context!(context_bold_start)(&context);
    let nom_context = context
        .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
            exit_matcher: ChainBehavior::AndParent(Some(&context_bold_end)),
        }))
        .with_additional_node(ContextElement::Context("bold"));
    let (remaining, captured) = recognize(tuple((bold_start, |i| {
        context_many_till(&nom_context, flat_text_element, context_bold_end)(i)
    })))(i)?;
    let ret = Bold { contents: captured };
    Ok((remaining, ret))
}

fn recognize_link_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
    recognize(link_end)(input)
}

fn flat_link<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Link<'s>> {
    let nom_context =
        context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
            exit_matcher: ChainBehavior::AndParent(Some(&recognize_link_end)),
        }));
    // let nom_context = context.with_additional_exit_matcher(&recognize_link_end);
    let text_element_parser = parser_with_context!(flat_text_element)(&nom_context);
    let (remaining, captured) = recognize(tuple((
        link_start,
        many_till(text_element_parser, link_end),
    )))(i)?;
    let ret = Link { contents: captured };
    Ok((remaining, ret))
}