
Paragraph's exit matcher which detects elements was causing the plain list parser to exit after the first item was parsed which was causing significant amounts of re-parsing.
224 lines
7.6 KiB
Rust
224 lines
7.6 KiB
Rust
use nom::branch::alt;
|
|
use nom::bytes::complete::tag;
|
|
use nom::character::complete::anychar;
|
|
use nom::character::complete::one_of;
|
|
use nom::character::complete::space0;
|
|
use nom::combinator::map;
|
|
use nom::combinator::not;
|
|
use nom::combinator::opt;
|
|
use nom::combinator::peek;
|
|
use nom::combinator::recognize;
|
|
use nom::combinator::verify;
|
|
use nom::multi::many_till;
|
|
|
|
use super::org_source::OrgSource;
|
|
use super::Context;
|
|
use super::Object;
|
|
use crate::error::CustomError;
|
|
use crate::error::MyError;
|
|
use crate::error::Res;
|
|
use crate::parser::exiting::ExitClass;
|
|
use crate::parser::object_parser::standard_set_object;
|
|
use crate::parser::parser_context::ContextElement;
|
|
use crate::parser::parser_context::ExitMatcherNode;
|
|
use crate::parser::parser_context::SubscriptSuperscriptBrace;
|
|
use crate::parser::parser_with_context::parser_with_context;
|
|
use crate::parser::util::exit_matcher_parser;
|
|
use crate::parser::util::get_consumed;
|
|
use crate::parser::Subscript;
|
|
use crate::parser::Superscript;
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
pub fn subscript<'r, 's>(
|
|
context: Context<'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, Subscript<'s>> {
|
|
// We check for the underscore first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily.
|
|
let (remaining, _) = tag("_")(input)?;
|
|
pre(context, input)?;
|
|
let (remaining, _body) = script_body(context, remaining)?;
|
|
let (remaining, _) = space0(remaining)?;
|
|
let source = get_consumed(input, remaining);
|
|
Ok((
|
|
remaining,
|
|
Subscript {
|
|
source: source.into(),
|
|
},
|
|
))
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
pub fn superscript<'r, 's>(
|
|
context: Context<'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, Superscript<'s>> {
|
|
// We check for the circumflex first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily.
|
|
let (remaining, _) = tag("^")(input)?;
|
|
pre(context, input)?;
|
|
let (remaining, _body) = script_body(context, remaining)?;
|
|
let (remaining, _) = space0(remaining)?;
|
|
let source = get_consumed(input, remaining);
|
|
Ok((
|
|
remaining,
|
|
Superscript {
|
|
source: source.into(),
|
|
},
|
|
))
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn pre<'r, 's>(_context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
|
|
let preceding_character = input.get_preceding_character();
|
|
match preceding_character {
|
|
Some(c) if !c.is_whitespace() => {}
|
|
_ => {
|
|
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
|
"Must be preceded by a non-whitespace character.".into(),
|
|
))));
|
|
}
|
|
};
|
|
Ok((input, ()))
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
enum ScriptBody<'s> {
|
|
Braceless(&'s str),
|
|
WithBraces(Vec<Object<'s>>),
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn script_body<'r, 's>(
|
|
context: Context<'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, ScriptBody<'s>> {
|
|
alt((
|
|
map(parser_with_context!(script_asterisk)(context), |body| {
|
|
ScriptBody::Braceless(body.into())
|
|
}),
|
|
map(parser_with_context!(script_alphanum)(context), |body| {
|
|
ScriptBody::Braceless(body.into())
|
|
}),
|
|
map(parser_with_context!(script_with_braces)(context), |body| {
|
|
ScriptBody::WithBraces(body.into())
|
|
}),
|
|
))(input)
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn script_asterisk<'r, 's>(
|
|
_context: Context<'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
|
tag("*")(input)
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn script_alphanum<'r, 's>(
|
|
context: Context<'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
|
let (remaining, _sign) = opt(recognize(one_of("+-")))(input)?;
|
|
let (remaining, _script) = many_till(
|
|
parser_with_context!(script_alphanum_character)(context),
|
|
parser_with_context!(end_script_alphanum_character)(context),
|
|
)(remaining)?;
|
|
let source = get_consumed(input, remaining);
|
|
Ok((remaining, source))
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn script_alphanum_character<'r, 's>(
|
|
_context: Context<'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
|
recognize(verify(anychar, |c| {
|
|
c.is_alphanumeric() || r#",.\"#.contains(*c)
|
|
}))(input)
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn end_script_alphanum_character<'r, 's>(
|
|
context: Context<'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
|
let (remaining, final_char) = recognize(verify(anychar, |c| c.is_alphanumeric()))(input)?;
|
|
peek(not(parser_with_context!(script_alphanum_character)(
|
|
context,
|
|
)))(remaining)?;
|
|
Ok((remaining, final_char))
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn script_with_braces<'r, 's>(
|
|
context: Context<'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, Vec<Object<'s>>> {
|
|
let (remaining, _) = tag("{")(input)?;
|
|
let parser_context = context
|
|
.with_additional_node(ContextElement::SubscriptSuperscriptBrace(
|
|
SubscriptSuperscriptBrace {
|
|
position: remaining.into(),
|
|
depth: 0,
|
|
},
|
|
))
|
|
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
|
|
class: ExitClass::Gamma,
|
|
exit_matcher: &script_with_braces_end,
|
|
}));
|
|
|
|
let (remaining, (children, _exit_contents)) = many_till(
|
|
parser_with_context!(standard_set_object)(&parser_context),
|
|
parser_with_context!(exit_matcher_parser)(&parser_context),
|
|
)(remaining)?;
|
|
|
|
let (remaining, _) = tag("}")(remaining)?;
|
|
Ok((remaining, children))
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn script_with_braces_end<'r, 's>(
|
|
context: Context<'r, 's>,
|
|
input: OrgSource<'s>,
|
|
) -> Res<OrgSource<'s>, OrgSource<'s>> {
|
|
let context_depth = get_bracket_depth(context)
|
|
.expect("This function should only be called from inside a subscript or superscript.");
|
|
let text_since_context_entry = get_consumed(context_depth.position, input);
|
|
let mut current_depth = context_depth.depth;
|
|
for c in Into::<&str>::into(text_since_context_entry).chars() {
|
|
match c {
|
|
'{' => {
|
|
current_depth += 1;
|
|
}
|
|
'}' if current_depth == 0 => {
|
|
panic!("Exceeded subscript or superscript brace depth.")
|
|
}
|
|
'}' if current_depth > 0 => {
|
|
current_depth -= 1;
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
if current_depth == 0 {
|
|
let close_bracket = tag::<&str, OrgSource<'_>, CustomError<OrgSource<'_>>>("}")(input);
|
|
if close_bracket.is_ok() {
|
|
return close_bracket;
|
|
}
|
|
}
|
|
return Err(nom::Err::Error(CustomError::MyError(MyError(
|
|
"Not a valid end for subscript or superscript.".into(),
|
|
))));
|
|
}
|
|
|
|
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
|
|
fn get_bracket_depth<'r, 's>(
|
|
context: Context<'r, 's>,
|
|
) -> Option<&'r SubscriptSuperscriptBrace<'s>> {
|
|
for node in context.iter() {
|
|
match node.get_data() {
|
|
ContextElement::SubscriptSuperscriptBrace(depth) => return Some(depth),
|
|
_ => {}
|
|
}
|
|
}
|
|
None
|
|
}
|