From f717d5e7df0760885e1ac82eac58444b0400cd24 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 24 Jul 2023 15:41:14 -0400 Subject: [PATCH] Implement parser for braceless subscript/superscript. --- .../subscript_and_superscript/simple.org | 1 + src/parser/subscript_and_superscript.rs | 111 +++++++++++++++++- 2 files changed, 107 insertions(+), 5 deletions(-) diff --git a/org_mode_samples/subscript_and_superscript/simple.org b/org_mode_samples/subscript_and_superscript/simple.org index 74cc815..b24299f 100644 --- a/org_mode_samples/subscript_and_superscript/simple.org +++ b/org_mode_samples/subscript_and_superscript/simple.org @@ -4,3 +4,4 @@ baz^{hello *world*} lorem_{} ipsum^+,\.a5 dolar_,\.a5 +text before foo_7 text afterwards diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs index 4cb85da..1af3970 100644 --- a/src/parser/subscript_and_superscript.rs +++ b/src/parser/subscript_and_superscript.rs @@ -1,13 +1,36 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::anychar; +use nom::character::complete::one_of; +use nom::character::complete::space0; +use nom::combinator::map; +use nom::combinator::not; +use nom::combinator::opt; +use nom::combinator::peek; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many_till; + use super::Context; +use super::Object; +use crate::error::CustomError; +use crate::error::MyError; use crate::error::Res; -use crate::parser::util::not_yet_implemented; +use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::get_consumed; +use crate::parser::util::get_one_before; use crate::parser::Subscript; use crate::parser::Superscript; #[tracing::instrument(ret, level = "debug")] pub fn subscript<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Subscript<'s>> { - not_yet_implemented()?; - todo!() + // We check for the underscore first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily. + let (remaining, _) = tag("_")(input)?; + pre(context, input)?; + let (remaining, _body) = script_body(context, remaining)?; + let (remaining, _) = space0(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, Subscript { source })) } #[tracing::instrument(ret, level = "debug")] @@ -15,6 +38,84 @@ pub fn superscript<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Superscript<'s>> { - not_yet_implemented()?; - todo!() + // We check for the circumflex first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily. + let (remaining, _) = tag("^")(input)?; + pre(context, input)?; + let (remaining, _body) = script_body(context, remaining)?; + let (remaining, _) = space0(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, Superscript { source })) +} + +#[tracing::instrument(ret, level = "debug")] +fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + match preceding_character { + Some(c) if !c.is_whitespace() => {} + _ => { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Must be preceded by a non-whitespace character.", + )))); + } + }; + Ok((input, ())) +} + +#[derive(Debug)] +enum ScriptBody<'s> { + Braceless(&'s str), + WithBraces(Vec>), +} + +#[tracing::instrument(ret, level = "debug")] +fn script_body<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ScriptBody<'s>> { + alt(( + map(parser_with_context!(script_asterisk)(context), |body| { + ScriptBody::Braceless(body) + }), + map(parser_with_context!(script_alphanum)(context), |body| { + ScriptBody::Braceless(body) + }), + ))(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn script_asterisk<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + tag("*")(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn script_alphanum<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let (remaining, _sign) = opt(recognize(one_of("+-")))(input)?; + let (remaining, _script) = many_till( + parser_with_context!(script_alphanum_character)(context), + parser_with_context!(end_script_alphanum_character)(context), + )(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} + +#[tracing::instrument(ret, level = "debug")] +fn script_alphanum_character<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + recognize(verify(anychar, |c| { + c.is_alphanumeric() || r#",.\"#.contains(*c) + }))(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn end_script_alphanum_character<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + let (remaining, final_char) = recognize(verify(anychar, |c| c.is_alphanumeric()))(input)?; + peek(not(parser_with_context!(script_alphanum_character)( + context, + )))(remaining)?; + Ok((remaining, final_char)) }