organic/src/parser/text_element_parser.rs

206 lines
7.4 KiB
Rust
Raw Normal View History

2022-07-16 03:26:49 +00:00
//! A single element of text.
2022-10-15 18:28:24 +00:00
use std::cell::RefCell;
use std::rc::Rc;
2022-07-17 01:32:23 +00:00
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::text::paragraph_end;
use super::nom_context::ChainBehavior;
use super::nom_context::ContextElement;
use super::nom_context::ContextTree;
use super::nom_context::FailMatcherNode;
2022-12-04 05:53:59 +00:00
use super::nom_context::PreviousElementNode;
2022-07-17 01:55:33 +00:00
use super::text::bold_end;
use super::text::bold_start;
2022-07-16 03:26:49 +00:00
use super::text::line_break;
use super::text::link_end;
use super::text::link_start;
2022-07-16 03:26:49 +00:00
use super::text::space;
use super::text::span;
use super::text::symbol;
2022-07-17 01:55:33 +00:00
use super::text::Bold;
use super::text::Link;
use super::text::Res;
2022-07-16 03:26:49 +00:00
use super::text::TextElement;
2022-12-04 04:53:52 +00:00
use super::token::Token;
use super::Context;
2022-07-16 03:26:49 +00:00
use nom::branch::alt;
2022-11-26 23:25:53 +00:00
use nom::combinator::eof;
2022-07-16 03:26:49 +00:00
use nom::combinator::map;
use nom::combinator::not;
2022-07-17 01:55:33 +00:00
use nom::combinator::recognize;
use nom::error::ErrorKind;
use nom::error::ParseError;
2022-07-16 03:26:49 +00:00
use nom::error::VerboseError;
use nom::multi::many1;
2022-07-17 01:32:23 +00:00
use nom::multi::many_till;
2022-07-17 01:55:33 +00:00
use nom::sequence::tuple;
2022-07-16 03:26:49 +00:00
use nom::IResult;
2022-11-27 00:46:59 +00:00
use nom::InputLength;
use nom::Parser;
2022-11-25 23:23:51 +00:00
use tracing::instrument;
use tracing::trace;
2022-10-15 00:17:48 +00:00
type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult<I, O, E>;
fn context_many_till<'r, 's, I, O, E, F, M, T>(
context: Context<'r, 's>,
2022-12-04 04:13:21 +00:00
mut many_matcher: M,
mut till_matcher: T,
) -> impl FnMut(I) -> IResult<I, (Vec<Token<'s>>, F), E> + 'r
2022-11-27 00:46:59 +00:00
where
I: Clone + InputLength,
E: ParseError<I>,
2022-12-11 04:51:04 +00:00
M: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, O, E> + 'r,
T: for<'x> Fn(Context<'x, 's>, I) -> IResult<I, F, E> + 'r,
O: Into<Token<'s>>,
2022-11-27 00:46:59 +00:00
{
move |mut i: I| {
let mut current_context = context.clone();
2022-12-11 05:08:39 +00:00
// Despite the clone, the Rc should still point to the same value, otherwise we'll get stuck in an endless loop.
assert!(current_context.ptr_eq(context));
2022-11-27 00:54:46 +00:00
loop {
let len = i.input_len();
match till_matcher(&current_context, i.clone()) {
2022-12-11 02:10:37 +00:00
Ok((remaining, finish)) => {
let mut ret = Vec::new();
2022-12-11 03:04:39 +00:00
while !current_context.ptr_eq(context) {
let (context_element, next_context) = current_context.pop_front();
let context_element = context_element.expect("We only pop off context elements created in this function, so they are all Some()");
current_context = next_context;
match context_element {
ContextElement::FailMatcherNode(_) => {}
ContextElement::PreviousElementNode(PreviousElementNode {
element: token,
}) => {
2022-12-11 03:24:12 +00:00
ret.push(token);
}
};
2022-12-11 03:04:39 +00:00
}
2022-12-11 04:57:39 +00:00
ret.reverse();
2022-12-11 02:10:37 +00:00
return Ok((remaining, (ret, finish)));
}
2022-11-27 00:54:46 +00:00
Err(nom::Err::Error(_)) => {
match many_matcher(&current_context, i.clone()) {
2022-11-27 00:54:46 +00:00
Err(nom::Err::Error(err)) => {
return Err(nom::Err::Error(E::append(i, ErrorKind::ManyTill, err)))
}
Err(e) => return Err(e),
Ok((remaining, many_elem)) => {
// infinite loop check: the parser must always consume
if remaining.input_len() == len {
return Err(nom::Err::Error(E::from_error_kind(
remaining,
ErrorKind::ManyTill,
)));
}
current_context = current_context.with_additional_node(
ContextElement::PreviousElementNode(PreviousElementNode {
element: many_elem.into(),
}),
);
2022-11-27 00:54:46 +00:00
i = remaining;
}
}
}
Err(e) => return Err(e),
2022-12-04 04:07:16 +00:00
};
2022-11-27 00:54:46 +00:00
}
}
2022-11-27 00:26:48 +00:00
}
pub fn document(input: &str) -> Res<&str, Vec<(Vec<TextElement>, &str)>> {
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let paragraph_parser = parser_with_context!(paragraph);
let ret = many1(paragraph_parser(initial_context))(input);
ret
}
2022-12-04 04:13:21 +00:00
pub fn context_paragraph_end<'s, 'r>(
context: Context<'r, 's>,
2022-12-04 04:13:21 +00:00
input: &'s str,
) -> Res<&'s str, &'s str> {
2022-12-04 04:07:16 +00:00
paragraph_end(input)
}
pub fn paragraph<'s, 'r>(
context: Context<'r, 's>,
2022-11-27 00:26:48 +00:00
i: &'s str,
) -> Res<&'s str, (Vec<TextElement<'s>>, &'s str)> {
// Add a not(eof) check because many_till cannot match a zero-length string
not(eof)(i)?;
2022-12-04 02:11:39 +00:00
let paragraph_context =
context.with_additional_node(ContextElement::FailMatcherNode(FailMatcherNode {
fail_matcher: ChainBehavior::AndParent(Some(&paragraph_end)),
}));
2022-12-11 04:57:39 +00:00
let (remaining, (many, till)) =
context_many_till(&paragraph_context, flat_text_element, context_paragraph_end)(i)?;
let many = many
.into_iter()
.filter_map(|token| match token {
Token::TextElement(text_element) => Some(text_element),
})
.collect();
Ok((remaining, (many, till)))
}
fn flat_text_element<'s, 'r>(
context: Context<'r, 's>,
i: &'s str,
) -> Res<&'s str, TextElement<'s>> {
2022-11-27 05:21:34 +00:00
not(|i| context.check_fail_matcher(i))(i)?;
2022-12-04 02:35:30 +00:00
let bold_matcher = parser_with_context!(flat_bold)(context.clone());
let link_matcher = parser_with_context!(flat_link)(context.clone());
alt((
2022-11-25 23:55:56 +00:00
map(bold_matcher, TextElement::Bold),
map(link_matcher, TextElement::Link),
map(span, TextElement::Span),
map(symbol("*"), TextElement::Symbol),
map(symbol("["), TextElement::Symbol),
map(symbol("]"), TextElement::Symbol),
map(space, TextElement::Space),
map(line_break, TextElement::LineBreak),
))(i)
2022-10-15 00:17:48 +00:00
}
2022-10-15 18:16:52 +00:00
fn recognize_bold_end(input: &str) -> Res<&str, &str> {
recognize(bold_end)(input)
}
fn flat_bold<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Bold<'s>> {
let nom_context =
2022-12-04 02:11:39 +00:00
context.with_additional_node(ContextElement::FailMatcherNode(FailMatcherNode {
fail_matcher: ChainBehavior::AndParent(Some(&recognize_bold_end)),
}));
// let nom_context = context.with_additional_fail_matcher(&recognize_bold_end);
let text_element_parser = parser_with_context!(flat_text_element)(nom_context);
let (remaining, captured) = recognize(tuple((
bold_start,
many_till(text_element_parser, bold_end),
)))(i)?;
2022-11-25 23:55:56 +00:00
let ret = Bold { contents: captured };
Ok((remaining, ret))
2022-10-15 18:28:24 +00:00
}
fn recognize_link_end(input: &str) -> Res<&str, &str> {
recognize(link_end)(input)
}
fn flat_link<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Link<'s>> {
let nom_context =
2022-12-04 02:11:39 +00:00
context.with_additional_node(ContextElement::FailMatcherNode(FailMatcherNode {
fail_matcher: ChainBehavior::AndParent(Some(&recognize_link_end)),
}));
// let nom_context = context.with_additional_fail_matcher(&recognize_link_end);
let text_element_parser = parser_with_context!(flat_text_element)(nom_context);
let (remaining, captured) = recognize(tuple((
link_start,
many_till(text_element_parser, link_end),
)))(i)?;
2022-11-25 23:55:56 +00:00
let ret = Link { contents: captured };
Ok((remaining, ret))
}