2022-07-16 03:26:49 +00:00
|
|
|
//! A single element of text.
|
2022-07-17 01:32:23 +00:00
|
|
|
use crate::parser::parser_with_context::parser_with_context;
|
|
|
|
use crate::parser::text::paragraph_end;
|
|
|
|
|
2022-12-16 05:53:29 +00:00
|
|
|
use super::combinator::context_many1;
|
2022-12-16 04:28:09 +00:00
|
|
|
use super::combinator::context_many_till;
|
2022-12-16 04:15:27 +00:00
|
|
|
use super::error::CustomError;
|
|
|
|
use super::error::MyError;
|
2022-12-04 03:18:37 +00:00
|
|
|
use super::nom_context::ChainBehavior;
|
|
|
|
use super::nom_context::ContextElement;
|
|
|
|
use super::nom_context::ContextTree;
|
2022-12-16 02:24:53 +00:00
|
|
|
use super::nom_context::ExitMatcherNode;
|
2022-12-04 05:53:59 +00:00
|
|
|
use super::nom_context::PreviousElementNode;
|
2022-07-17 01:55:33 +00:00
|
|
|
use super::text::bold_end;
|
|
|
|
use super::text::bold_start;
|
2022-07-16 03:26:49 +00:00
|
|
|
use super::text::line_break;
|
2022-11-25 23:40:38 +00:00
|
|
|
use super::text::link_end;
|
|
|
|
use super::text::link_start;
|
2022-07-16 03:26:49 +00:00
|
|
|
use super::text::space;
|
|
|
|
use super::text::span;
|
|
|
|
use super::text::symbol;
|
2022-07-17 01:55:33 +00:00
|
|
|
use super::text::Bold;
|
2022-11-25 23:40:38 +00:00
|
|
|
use super::text::Link;
|
2022-12-16 06:35:49 +00:00
|
|
|
use super::text::Paragraph;
|
2022-07-17 00:42:56 +00:00
|
|
|
use super::text::Res;
|
2022-07-16 03:26:49 +00:00
|
|
|
use super::text::TextElement;
|
2022-12-04 04:53:52 +00:00
|
|
|
use super::token::Token;
|
2022-12-04 02:13:42 +00:00
|
|
|
use super::Context;
|
2022-07-16 03:26:49 +00:00
|
|
|
use nom::branch::alt;
|
2022-12-11 07:24:19 +00:00
|
|
|
use nom::bytes::complete::tag;
|
2022-12-11 07:07:12 +00:00
|
|
|
use nom::bytes::complete::take;
|
|
|
|
use nom::combinator::cond;
|
2022-11-26 23:25:53 +00:00
|
|
|
use nom::combinator::eof;
|
2022-07-16 03:26:49 +00:00
|
|
|
use nom::combinator::map;
|
|
|
|
use nom::combinator::not;
|
2022-12-11 07:24:19 +00:00
|
|
|
use nom::combinator::peek;
|
2022-07-17 01:55:33 +00:00
|
|
|
use nom::combinator::recognize;
|
2022-11-26 23:22:41 +00:00
|
|
|
use nom::error::ErrorKind;
|
|
|
|
use nom::error::ParseError;
|
2022-12-16 03:38:28 +00:00
|
|
|
use nom::error::VerboseError;
|
2022-07-17 01:32:23 +00:00
|
|
|
use nom::multi::many_till;
|
2022-07-17 01:55:33 +00:00
|
|
|
use nom::sequence::tuple;
|
2022-07-16 03:26:49 +00:00
|
|
|
use nom::IResult;
|
2022-11-27 00:46:59 +00:00
|
|
|
use nom::InputLength;
|
2022-10-15 00:17:48 +00:00
|
|
|
|
2022-12-11 04:49:02 +00:00
|
|
|
type UnboundMatcher<'r, 's, I, O, E> = dyn Fn(Context<'r, 's>, I) -> IResult<I, O, E>;
|
2022-12-04 03:44:53 +00:00
|
|
|
|
2022-12-16 06:35:49 +00:00
|
|
|
pub fn document(input: &str) -> Res<&str, Vec<Paragraph>> {
|
2022-12-11 04:49:02 +00:00
|
|
|
let initial_context: ContextTree<'_, '_> = ContextTree::new();
|
2022-12-16 06:35:49 +00:00
|
|
|
let (remaining, tokens) = context_many1(&initial_context, paragraph)(input)?;
|
|
|
|
let paragraphs = tokens.into_iter().map(|token| {
|
|
|
|
match token {
|
|
|
|
Token::TextElement(_) => unreachable!(),
|
|
|
|
Token::Paragraph(paragraph) => paragraph,
|
|
|
|
}
|
|
|
|
}).collect();
|
|
|
|
Ok((remaining, paragraphs))
|
2022-11-27 00:14:19 +00:00
|
|
|
}
|
|
|
|
|
2022-12-04 04:13:21 +00:00
|
|
|
pub fn context_paragraph_end<'s, 'r>(
|
2022-12-11 04:49:02 +00:00
|
|
|
context: Context<'r, 's>,
|
2022-12-04 04:13:21 +00:00
|
|
|
input: &'s str,
|
|
|
|
) -> Res<&'s str, &'s str> {
|
2022-12-04 04:07:16 +00:00
|
|
|
paragraph_end(input)
|
|
|
|
}
|
|
|
|
|
2022-12-11 07:07:12 +00:00
|
|
|
fn can_start_bold<'s, 'r>(context: Context<'r, 's>) -> bool {
|
2022-12-16 02:57:21 +00:00
|
|
|
_preceded_by_whitespace(context) && !_in_section(context, "bold")
|
|
|
|
}
|
|
|
|
|
|
|
|
fn _in_section<'s, 'r, 'x>(context: Context<'r, 's>, section_name: &'x str) -> bool {
|
|
|
|
for thing in context.iter() {
|
|
|
|
match thing.get_data() {
|
|
|
|
ContextElement::ExitMatcherNode(_) => {}
|
|
|
|
ContextElement::PreviousElementNode(_) => {}
|
|
|
|
ContextElement::Context(name) if *name == section_name => return true,
|
|
|
|
ContextElement::Context(_) => {}
|
|
|
|
ContextElement::StartOfParagraph => {} // TODO: If we specialize this to bold then this would be a good spot to stop scanning
|
|
|
|
}
|
|
|
|
}
|
|
|
|
false
|
|
|
|
}
|
|
|
|
|
|
|
|
fn _preceded_by_whitespace<'s, 'r>(context: Context<'r, 's>) -> bool {
|
2022-12-11 07:07:12 +00:00
|
|
|
let mut context_iterator = context.iter().enumerate();
|
|
|
|
loop {
|
|
|
|
if let Some((i, ctx)) = context_iterator.next() {
|
|
|
|
match ctx.get_data() {
|
2022-12-16 02:24:53 +00:00
|
|
|
ContextElement::ExitMatcherNode(_) => {}
|
2022-12-11 07:07:12 +00:00
|
|
|
ContextElement::PreviousElementNode(previous_element_node) => {
|
|
|
|
match &previous_element_node.element {
|
|
|
|
Token::TextElement(text_element) => {
|
|
|
|
match text_element {
|
|
|
|
TextElement::Span(_) => return false,
|
|
|
|
TextElement::Space(_) => return true,
|
|
|
|
TextElement::LineBreak(_) => return true,
|
|
|
|
TextElement::Symbol(_) => return false,
|
|
|
|
TextElement::Bold(_) => return false,
|
|
|
|
TextElement::Link(_) => return false,
|
|
|
|
};
|
2022-12-16 01:32:00 +00:00
|
|
|
}
|
2022-12-16 06:35:49 +00:00
|
|
|
Token::Paragraph(_) => unreachable!(),
|
2022-12-11 07:07:12 +00:00
|
|
|
};
|
2022-12-16 01:32:00 +00:00
|
|
|
}
|
2022-12-11 07:07:12 +00:00
|
|
|
ContextElement::StartOfParagraph => {
|
|
|
|
return true;
|
2022-12-16 01:32:00 +00:00
|
|
|
}
|
2022-12-16 02:57:21 +00:00
|
|
|
ContextElement::Context(_) => {}
|
2022-12-11 07:07:12 +00:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
false
|
|
|
|
}
|
|
|
|
|
2022-12-11 05:36:59 +00:00
|
|
|
pub fn context_bold_start<'s, 'r>(
|
|
|
|
context: Context<'r, 's>,
|
|
|
|
input: &'s str,
|
|
|
|
) -> Res<&'s str, &'s str> {
|
2022-12-11 07:07:12 +00:00
|
|
|
if can_start_bold(context) {
|
|
|
|
recognize(bold_start)(input)
|
|
|
|
} else {
|
2022-12-16 04:15:27 +00:00
|
|
|
// TODO: Make this a specific error instead of just a generic MyError
|
2022-12-16 04:28:09 +00:00
|
|
|
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
|
|
|
"Cannot start bold",
|
|
|
|
))));
|
2022-12-11 07:07:12 +00:00
|
|
|
}
|
2022-12-11 05:36:59 +00:00
|
|
|
}
|
|
|
|
|
2022-12-11 05:33:40 +00:00
|
|
|
pub fn context_bold_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
2022-12-11 07:24:19 +00:00
|
|
|
let (remaining, actual_match) = recognize(bold_end)(input)?;
|
|
|
|
peek(alt((
|
2022-12-16 02:24:53 +00:00
|
|
|
// Must have whitespace after the end asterisk or it must be the end of that section (as checked by the exit matcher)
|
2022-12-11 07:24:19 +00:00
|
|
|
tag(" "),
|
|
|
|
tag("\t"),
|
|
|
|
tag("\n"),
|
2022-12-16 02:24:53 +00:00
|
|
|
|i| context.check_exit_matcher(i),
|
2022-12-11 07:24:19 +00:00
|
|
|
)))(remaining)?;
|
|
|
|
|
|
|
|
Ok((remaining, actual_match))
|
2022-12-11 05:33:40 +00:00
|
|
|
}
|
|
|
|
|
2022-12-16 06:35:49 +00:00
|
|
|
pub fn paragraph<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Paragraph<'s>> {
|
2022-11-27 00:14:19 +00:00
|
|
|
// Add a not(eof) check because many_till cannot match a zero-length string
|
|
|
|
not(eof)(i)?;
|
2022-12-11 05:21:30 +00:00
|
|
|
let paragraph_context = context
|
2022-12-16 02:24:53 +00:00
|
|
|
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
|
|
|
exit_matcher: ChainBehavior::AndParent(Some(&context_paragraph_end)),
|
2022-12-16 06:35:49 +00:00
|
|
|
}))
|
|
|
|
.with_additional_node(ContextElement::StartOfParagraph);
|
2022-12-11 04:57:39 +00:00
|
|
|
let (remaining, (many, till)) =
|
|
|
|
context_many_till(¶graph_context, flat_text_element, context_paragraph_end)(i)?;
|
|
|
|
let many = many
|
|
|
|
.into_iter()
|
|
|
|
.filter_map(|token| match token {
|
|
|
|
Token::TextElement(text_element) => Some(text_element),
|
2022-12-16 06:35:49 +00:00
|
|
|
Token::Paragraph(_) => panic!("There should only be text elements in paragraphs."),
|
2022-12-11 04:57:39 +00:00
|
|
|
})
|
|
|
|
.collect();
|
2022-12-16 06:35:49 +00:00
|
|
|
Ok((
|
|
|
|
remaining,
|
|
|
|
Paragraph {
|
|
|
|
contents: many,
|
|
|
|
paragraph_end: till,
|
|
|
|
},
|
|
|
|
))
|
2022-11-27 00:14:19 +00:00
|
|
|
}
|
|
|
|
|
2022-12-11 04:49:02 +00:00
|
|
|
fn flat_text_element<'s, 'r>(
|
|
|
|
context: Context<'r, 's>,
|
|
|
|
i: &'s str,
|
|
|
|
) -> Res<&'s str, TextElement<'s>> {
|
2022-12-16 02:24:53 +00:00
|
|
|
not(|i| context.check_exit_matcher(i))(i)?;
|
2022-10-15 18:04:24 +00:00
|
|
|
|
2022-12-11 05:39:35 +00:00
|
|
|
let bold_matcher = parser_with_context!(flat_bold)(&context);
|
|
|
|
let link_matcher = parser_with_context!(flat_link)(&context);
|
2022-11-25 18:55:09 +00:00
|
|
|
|
2022-10-15 18:04:24 +00:00
|
|
|
alt((
|
2022-11-25 23:55:56 +00:00
|
|
|
map(bold_matcher, TextElement::Bold),
|
|
|
|
map(link_matcher, TextElement::Link),
|
2022-10-15 18:04:24 +00:00
|
|
|
map(span, TextElement::Span),
|
|
|
|
map(symbol("*"), TextElement::Symbol),
|
|
|
|
map(symbol("["), TextElement::Symbol),
|
|
|
|
map(symbol("]"), TextElement::Symbol),
|
|
|
|
map(space, TextElement::Space),
|
|
|
|
map(line_break, TextElement::LineBreak),
|
|
|
|
))(i)
|
2022-10-15 00:17:48 +00:00
|
|
|
}
|
2022-10-15 18:16:52 +00:00
|
|
|
|
2022-12-11 04:49:02 +00:00
|
|
|
fn flat_bold<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Bold<'s>> {
|
2022-12-11 05:39:35 +00:00
|
|
|
let bold_start = parser_with_context!(context_bold_start)(&context);
|
2022-12-16 02:57:21 +00:00
|
|
|
let nom_context = context
|
|
|
|
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
2022-12-16 02:24:53 +00:00
|
|
|
exit_matcher: ChainBehavior::AndParent(Some(&context_bold_end)),
|
2022-12-16 02:57:21 +00:00
|
|
|
}))
|
|
|
|
.with_additional_node(ContextElement::Context("bold"));
|
2022-12-11 05:36:59 +00:00
|
|
|
let (remaining, captured) = recognize(tuple((bold_start, |i| {
|
|
|
|
context_many_till(&nom_context, flat_text_element, context_bold_end)(i)
|
|
|
|
})))(i)?;
|
2022-11-25 23:55:56 +00:00
|
|
|
let ret = Bold { contents: captured };
|
2022-11-25 18:55:09 +00:00
|
|
|
Ok((remaining, ret))
|
2022-10-15 18:28:24 +00:00
|
|
|
}
|
|
|
|
|
2022-12-16 01:32:00 +00:00
|
|
|
fn recognize_link_end<'s, 'r>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
|
2022-11-25 23:40:38 +00:00
|
|
|
recognize(link_end)(input)
|
|
|
|
}
|
|
|
|
|
2022-12-11 04:49:02 +00:00
|
|
|
fn flat_link<'s, 'r>(context: Context<'r, 's>, i: &'s str) -> Res<&'s str, Link<'s>> {
|
2022-12-16 06:39:57 +00:00
|
|
|
// TODO: Link has to be updated to contextual functions like bold was
|
2022-12-04 03:18:37 +00:00
|
|
|
let nom_context =
|
2022-12-16 02:24:53 +00:00
|
|
|
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
|
|
|
exit_matcher: ChainBehavior::AndParent(Some(&recognize_link_end)),
|
2022-12-04 02:11:39 +00:00
|
|
|
}));
|
2022-12-16 02:24:53 +00:00
|
|
|
// let nom_context = context.with_additional_exit_matcher(&recognize_link_end);
|
2022-12-11 05:39:35 +00:00
|
|
|
let text_element_parser = parser_with_context!(flat_text_element)(&nom_context);
|
2022-11-25 23:40:38 +00:00
|
|
|
let (remaining, captured) = recognize(tuple((
|
|
|
|
link_start,
|
|
|
|
many_till(text_element_parser, link_end),
|
|
|
|
)))(i)?;
|
2022-11-25 23:55:56 +00:00
|
|
|
let ret = Link { contents: captured };
|
2022-11-25 23:40:38 +00:00
|
|
|
Ok((remaining, ret))
|
|
|
|
}
|