organic/src/parser/latex_fragment.rs
2023-07-19 00:38:19 -04:00

231 lines
8.0 KiB
Rust

use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::alpha1;
use nom::character::complete::anychar;
use nom::character::complete::line_ending;
use nom::character::complete::none_of;
use nom::character::complete::one_of;
use nom::character::complete::space0;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many0;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::Context;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::get_one_before;
use crate::parser::LatexFragment;
#[tracing::instrument(ret, level = "debug")]
pub fn latex_fragment<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, LatexFragment<'s>> {
let (remaining, _) = alt((
parser_with_context!(raw_latex_fragment)(context),
parser_with_context!(escaped_parenthesis_fragment)(context),
parser_with_context!(escaped_bracket_fragment)(context),
parser_with_context!(double_dollar_fragment)(context),
parser_with_context!(dollar_char_fragment)(context),
parser_with_context!(bordered_dollar_fragment)(context),
))(input)?;
let (remaining, _) = space0(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, LatexFragment { source }))
}
#[tracing::instrument(ret, level = "debug")]
fn raw_latex_fragment<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let (remaining, _) = tag("\\")(input)?;
let (remaining, _) = name(context, remaining)?;
let (remaining, _) = many0(parser_with_context!(brackets)(context))(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
fn name<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
alpha1(input)
}
#[tracing::instrument(ret, level = "debug")]
fn brackets<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let (remaining, body) = alt((
recognize(tuple((
tag("["),
many_till(
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
alt((recognize(one_of("{}[]")), line_ending)),
))),
),
tag("]"),
))),
recognize(tuple((
tag("{"),
many_till(
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
alt((recognize(one_of("{}")), line_ending)),
))),
),
tag("}"),
))),
))(input)?;
Ok((remaining, body))
}
#[tracing::instrument(ret, level = "debug")]
fn escaped_parenthesis_fragment<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
let (remaining, _) = tag("\\(")(input)?;
let (remaining, _) = recognize(many_till(
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
tag("\\)"),
))),
))(remaining)?;
let (remaining, _) = tag("\\)")(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
fn escaped_bracket_fragment<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
let (remaining, _) = tag("\\[")(input)?;
let (remaining, _) = recognize(many_till(
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
tag("\\]"),
))),
))(remaining)?;
let (remaining, _) = tag("\\]")(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
fn double_dollar_fragment<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
// TODO: The documentation on the dollar sign versions is incomplete. Test to figure out what the real requirements are. For example, can this span more than 3 lines and can this contain a single $ since its terminated by $$?
let (remaining, _) = tag("$$")(input)?;
let (remaining, _) = recognize(many_till(
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
tag("$"),
))),
))(remaining)?;
let (remaining, _) = tag("$$")(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
fn dollar_char_fragment<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let (_, _) = pre(context, input)?;
let (remaining, _) = tag("$")(input)?;
let (remaining, _) = verify(none_of(".,?;\""), |c| !c.is_whitespace())(remaining)?;
let (remaining, _) = tag("$")(remaining)?;
let (_, _) = peek(parser_with_context!(post)(context))(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
pub fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
if let Some('$') = preceding_character {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not a valid pre character for dollar char fragment.",
))));
}
Ok((input, ()))
}
#[tracing::instrument(ret, level = "debug")]
pub fn post<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
// TODO: What about eof? Test to find out.
// TODO: Figure out which punctuation characters should be included.
let (remaining, _) = alt((recognize(one_of(" \t-.,;:!?'\"")), line_ending))(input)?;
Ok((remaining, ()))
}
#[tracing::instrument(ret, level = "debug")]
fn bordered_dollar_fragment<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
let (_, _) = pre(context, input)?;
let (remaining, _) = tag("$")(input)?;
// TODO: I'm assuming I should be peeking at the borders but the documentation is not clear. Test to figure out.
let (_, _) = peek(parser_with_context!(open_border)(context))(remaining)?;
// TODO: As an optimization it would be nice to exit early upon hitting the 3rd line break
let (remaining, _) = verify(
recognize(many_till(
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
tag("$"),
))),
)),
|body: &str| body.lines().take(4).count() <= 3,
)(remaining)?;
let (_, _) = peek(parser_with_context!(close_border)(context))(remaining)?;
let (remaining, _) = tag("$")(remaining)?;
let (_, _) = peek(parser_with_context!(post)(context))(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
pub fn open_border<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
recognize(verify(none_of(".,;$"), |c| !c.is_whitespace()))(input)
}
#[tracing::instrument(ret, level = "debug")]
pub fn close_border<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
match preceding_character {
Some(c) if !c.is_whitespace() && !".,;$".contains(c) => Ok((input, ())),
_ => {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not a valid pre character for dollar char fragment.",
))));
}
}
}