Unify two places checking if text was preceded by whitespace.

This commit is contained in:
Tom Alexander 2023-09-07 02:27:55 -04:00
parent 6b82b46e09
commit ba291c6776
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
3 changed files with 24 additions and 35 deletions

View File

@ -15,6 +15,7 @@ use super::org_source::BracketDepth;
use super::org_source::OrgSource; use super::org_source::OrgSource;
use super::util::exit_matcher_parser; use super::util::exit_matcher_parser;
use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting;
use super::util::preceded_by_whitespace;
use crate::context::parser_with_context; use crate::context::parser_with_context;
use crate::context::ContextElement; use crate::context::ContextElement;
use crate::context::ContextMatcher; use crate::context::ContextMatcher;
@ -36,7 +37,7 @@ pub fn subscript<'b, 'g, 'r, 's>(
) -> Res<OrgSource<'s>, Subscript<'s>> { ) -> Res<OrgSource<'s>, Subscript<'s>> {
// We check for the underscore first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily. // We check for the underscore first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily.
let (remaining, _) = tag("_")(input)?; let (remaining, _) = tag("_")(input)?;
pre(context, input)?; pre(input)?;
let (remaining, _body) = script_body(context, remaining)?; let (remaining, _body) = script_body(context, remaining)?;
let (remaining, _trailing_whitespace) = let (remaining, _trailing_whitespace) =
maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?;
@ -56,7 +57,7 @@ pub fn superscript<'b, 'g, 'r, 's>(
) -> Res<OrgSource<'s>, Superscript<'s>> { ) -> Res<OrgSource<'s>, Superscript<'s>> {
// We check for the circumflex first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily. // We check for the circumflex first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily.
let (remaining, _) = tag("^")(input)?; let (remaining, _) = tag("^")(input)?;
pre(context, input)?; pre(input)?;
let (remaining, _body) = script_body(context, remaining)?; let (remaining, _body) = script_body(context, remaining)?;
let (remaining, _trailing_whitespace) = let (remaining, _trailing_whitespace) =
maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?;
@ -70,19 +71,8 @@ pub fn superscript<'b, 'g, 'r, 's>(
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn pre<'b, 'g, 'r, 's>( fn pre<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
_context: RefContext<'b, 'g, 'r, 's>, not(preceded_by_whitespace(true))(input)?;
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, ()> {
let preceding_character = input.get_preceding_character();
match preceding_character {
Some(c) if !c.is_whitespace() => {}
_ => {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Must be preceded by a non-whitespace character.".into(),
))));
}
};
Ok((input, ())) Ok((input, ()))
} }
@ -120,37 +110,27 @@ fn script_asterisk<'b, 'g, 'r, 's>(
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn script_alphanum<'b, 'g, 'r, 's>( fn script_alphanum<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>, _context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>, input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> { ) -> Res<OrgSource<'s>, OrgSource<'s>> {
let (remaining, _sign) = opt(recognize(one_of("+-")))(input)?; let (remaining, _sign) = opt(recognize(one_of("+-")))(input)?;
let (remaining, _script) = many_till( let (remaining, _script) =
parser_with_context!(script_alphanum_character)(context), many_till(script_alphanum_character, end_script_alphanum_character)(remaining)?;
parser_with_context!(end_script_alphanum_character)(context),
)(remaining)?;
let source = get_consumed(input, remaining); let source = get_consumed(input, remaining);
Ok((remaining, source)) Ok((remaining, source))
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn script_alphanum_character<'b, 'g, 'r, 's>( fn script_alphanum_character<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
_context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(verify(anychar, |c| { recognize(verify(anychar, |c| {
c.is_alphanumeric() || r#",.\"#.contains(*c) c.is_alphanumeric() || r#",.\"#.contains(*c)
}))(input) }))(input)
} }
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn end_script_alphanum_character<'b, 'g, 'r, 's>( fn end_script_alphanum_character<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let (remaining, final_char) = recognize(verify(anychar, |c| c.is_alphanumeric()))(input)?; let (remaining, final_char) = recognize(verify(anychar, |c| c.is_alphanumeric()))(input)?;
peek(not(parser_with_context!(script_alphanum_character)( peek(not(script_alphanum_character))(remaining)?;
context,
)))(remaining)?;
Ok((remaining, final_char)) Ok((remaining, final_char))
} }

View File

@ -325,7 +325,7 @@ fn _text_markup_end<'b, 'g, 'r, 's, 'c>(
input: OrgSource<'s>, input: OrgSource<'s>,
marker_symbol: &'c str, marker_symbol: &'c str,
) -> Res<OrgSource<'s>, OrgSource<'s>> { ) -> Res<OrgSource<'s>, OrgSource<'s>> {
not(preceded_by_whitespace)(input)?; not(preceded_by_whitespace(false))(input)?;
let (remaining, _marker) = terminated( let (remaining, _marker) = terminated(
tag(marker_symbol), tag(marker_symbol),
peek(parser_with_context!(post)(context)), peek(parser_with_context!(post)(context)),

View File

@ -135,16 +135,25 @@ pub fn start_of_line<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
} }
} }
pub fn preceded_by_whitespace(
allow_start_of_file: bool,
) -> impl for<'s> Fn(OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
move |input| _preceded_by_whitespace(allow_start_of_file, input)
}
/// Check that we are at the start of a line /// Check that we are at the start of a line
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn preceded_by_whitespace<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> { fn _preceded_by_whitespace<'s>(
allow_start_of_file: bool,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, ()> {
let preceding_character = input.get_preceding_character(); let preceding_character = input.get_preceding_character();
if !preceding_character if !preceding_character
.map(|c| c.is_whitespace() || c == '\u{200B}') // 200B = Zero-width space .map(|c| c.is_whitespace() || c == '\u{200B}') // 200B = Zero-width space
.unwrap_or(false) .unwrap_or(allow_start_of_file)
{ {
return Err(nom::Err::Error(CustomError::MyError(MyError( return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not preceded by whitespace.".into(), "Must be preceded by a non-whitespace character.".into(),
)))); ))));
} }
Ok((input, ())) Ok((input, ()))