//! A single element of text. use crate::parser::parser_with_context::parser_with_context; use crate::parser::text::paragraph_end; use super::nom_context::ChainBehavior; use super::nom_context::ContextElement; use super::nom_context::ContextTree; use super::nom_context::ExitMatcherNode; use super::nom_context::PreviousElementNode; use super::text::bold_end; use super::text::bold_start; use super::text::line_break; use super::text::link_end; use super::text::link_start; use super::text::space; use super::text::span; use super::text::symbol; use super::text::Bold; use super::text::Link; use super::text::Res; use super::text::TextElement; use super::token::Token; use super::Context; use nom::branch::alt; use nom::bytes::complete::tag; use nom::bytes::complete::take; use nom::combinator::cond; use nom::combinator::eof; use nom::combinator::map; use nom::combinator::not; use nom::combinator::peek; use nom::combinator::recognize; use nom::error::ErrorKind; use nom::error::ParseError; use nom::error::VerboseError; use nom::multi::many1; use nom::multi::many_till; use nom::sequence::tuple; use nom::IResult; use nom::InputLength; type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult; fn context_many_till<'r, 's, I, O, E, F, M, T>( context: Context<'r, 's>, mut many_matcher: M, mut till_matcher: T, ) -> impl FnMut(I) -> IResult>, F), E> + 'r where I: Clone + InputLength, E: ParseError, M: for<'x> Fn(Context<'x, 's>, I) -> IResult + 'r, T: for<'x> Fn(Context<'x, 's>, I) -> IResult + 'r, O: Into>, { move |mut i: I| { // TODO: Can I eliminate the clone? let mut current_context = context.clone(); // Despite the clone, the Rc should still point to the same value, otherwise we'll get stuck in an endless loop. assert!(current_context.ptr_eq(context)); loop { let len = i.input_len(); match till_matcher(¤t_context, i.clone()) { Ok((remaining, finish)) => { let mut ret = Vec::new(); while !current_context.ptr_eq(context) { let (context_element, next_context) = current_context.pop_front(); let context_element = context_element.expect("We only pop off context elements created in this function, so they are all Some()"); current_context = next_context; match context_element { ContextElement::ExitMatcherNode(_) => {} ContextElement::StartOfParagraph => {} ContextElement::Context(_) => {} ContextElement::PreviousElementNode(PreviousElementNode { element: token, }) => { ret.push(token); } }; } ret.reverse(); return Ok((remaining, (ret, finish))); } Err(nom::Err::Error(_)) => { match many_matcher(¤t_context, i.clone()) { Err(nom::Err::Error(err)) => { return Err(nom::Err::Error(E::append(i, ErrorKind::ManyTill, err))) } Err(e) => return Err(e), Ok((remaining, many_elem)) => { // infinite loop check: the parser must always consume if remaining.input_len() == len { return Err(nom::Err::Error(E::from_error_kind( remaining, ErrorKind::ManyTill, ))); } current_context = current_context.with_additional_node( ContextElement::PreviousElementNode(PreviousElementNode { element: many_elem.into(), }), ); i = remaining; } } } Err(e) => return Err(e), }; } } } pub fn document(input: &str) -> Res<&str, Vec<(Vec, &str)>> { let initial_context: ContextTree<'_, '_> = ContextTree::new(); let paragraph_parser = parser_with_context!(paragraph); let ret = many1(paragraph_parser(&initial_context))(input); ret } pub fn context_paragraph_end<'s, 'r>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, &'s str> { paragraph_end(input) } fn can_start_bold<'s, 'r>(context: Context<'r, 's>) -> bool { _preceded_by_whitespace(context) && !_in_section(context, "bold") } fn _in_section<'s, 'r, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool { for thing in context.iter() { match thing.get_data() { ContextElement::ExitMatcherNode(_) => {} ContextElement::PreviousElementNode(_) => {} ContextElement::Context(name) if *name == section_name => return true, ContextElement::Context(_) => {} ContextElement::StartOfParagraph => {} // TODO: If we specialize this to bold then this would be a good spot to stop scanning } } false } fn _preceded_by_whitespace<'s, 'r>(context: Context<'r, 's>) -> bool { let mut context_iterator = context.iter().enumerate(); loop { if let Some((i, ctx)) = context_iterator.next() { match ctx.get_data() { ContextElement::ExitMatcherNode(_) => {} ContextElement::PreviousElementNode(previous_element_node) => { match &previous_element_node.element { Token::TextElement(text_element) => { match text_element { TextElement::Span(_) => return false, TextElement::Space(_) => return true, TextElement::LineBreak(_) => return true, TextElement::Symbol(_) => return false, TextElement::Bold(_) => return false, TextElement::Link(_) => return false, }; } }; } ContextElement::StartOfParagraph => { return true; } ContextElement::Context(_) => {} } } else { break; } } false } pub fn context_bold_start<'s, 'r>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, &'s str> { if can_start_bold(context) { recognize(bold_start)(input) } else { // TODO: Make this a custom error not(take(0usize))(input)?; unreachable!() } } pub fn context_bold_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { let (remaining, actual_match) = recognize(bold_end)(input)?; peek(alt(( // Must have whitespace after the end asterisk or it must be the end of that section (as checked by the exit matcher) tag(" "), tag("\t"), tag("\n"), |i| context.check_exit_matcher(i), )))(remaining)?; Ok((remaining, actual_match)) } pub fn paragraph<'s, 'r>( context: Context<'r, 's>, i: &'s str, ) -> Res<&'s str, (Vec>, &'s str)> { // Add a not(eof) check because many_till cannot match a zero-length string not(eof)(i)?; let paragraph_context = context .with_additional_node(ContextElement::StartOfParagraph) .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { exit_matcher: ChainBehavior::AndParent(Some(&context_paragraph_end)), })); let (remaining, (many, till)) = context_many_till(¶graph_context, flat_text_element, context_paragraph_end)(i)?; let many = many .into_iter() .filter_map(|token| match token { Token::TextElement(text_element) => Some(text_element), }) .collect(); Ok((remaining, (many, till))) } fn flat_text_element<'s, 'r>( context: Context<'r, 's>, i: &'s str, ) -> Res<&'s str, TextElement<'s>> { not(|i| context.check_exit_matcher(i))(i)?; let bold_matcher = parser_with_context!(flat_bold)(&context); let link_matcher = parser_with_context!(flat_link)(&context); alt(( map(bold_matcher, TextElement::Bold), map(link_matcher, TextElement::Link), map(span, TextElement::Span), map(symbol("*"), TextElement::Symbol), map(symbol("["), TextElement::Symbol), map(symbol("]"), TextElement::Symbol), map(space, TextElement::Space), map(line_break, TextElement::LineBreak), ))(i) } fn flat_bold<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Bold<'s>> { let bold_start = parser_with_context!(context_bold_start)(&context); let nom_context = context .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { exit_matcher: ChainBehavior::AndParent(Some(&context_bold_end)), })) .with_additional_node(ContextElement::Context("bold")); let (remaining, captured) = recognize(tuple((bold_start, |i| { context_many_till(&nom_context, flat_text_element, context_bold_end)(i) })))(i)?; let ret = Bold { contents: captured }; Ok((remaining, ret)) } fn recognize_link_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { recognize(link_end)(input) } fn flat_link<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Link<'s>> { let nom_context = context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { exit_matcher: ChainBehavior::AndParent(Some(&recognize_link_end)), })); // let nom_context = context.with_additional_exit_matcher(&recognize_link_end); let text_element_parser = parser_with_context!(flat_text_element)(&nom_context); let (remaining, captured) = recognize(tuple(( link_start, many_till(text_element_parser, link_end), )))(i)?; let ret = Link { contents: captured }; Ok((remaining, ret)) }