Implement parser for braceless subscript/superscript.

2023-07-24 15:41:14 -04:00 · 2023-07-24 15:41:14 -04:00 · f717d5e7df
commit f717d5e7df
parent 6d4379d029
2 changed files with 107 additions and 5 deletions
--- a/org_mode_samples/subscript_and_superscript/simple.org
+++ b/org_mode_samples/subscript_and_superscript/simple.org
@ -4,3 +4,4 @@ baz^{hello *world*}
 lorem_{}
 ipsum^+,\.a5
 dolar_,\.a5
+text before foo_7 text afterwards
--- a/src/parser/subscript_and_superscript.rs
+++ b/src/parser/subscript_and_superscript.rs
@ -1,13 +1,36 @@
+use nom::branch::alt;
+use nom::bytes::complete::tag;
+use nom::character::complete::anychar;
+use nom::character::complete::one_of;
+use nom::character::complete::space0;
+use nom::combinator::map;
+use nom::combinator::not;
+use nom::combinator::opt;
+use nom::combinator::peek;
+use nom::combinator::recognize;
+use nom::combinator::verify;
+use nom::multi::many_till;
+
 use super::Context;
+use super::Object;
+use crate::error::CustomError;
+use crate::error::MyError;
 use crate::error::Res;
-use crate::parser::util::not_yet_implemented;
+use crate::parser::parser_with_context::parser_with_context;
+use crate::parser::util::get_consumed;
+use crate::parser::util::get_one_before;
 use crate::parser::Subscript;
 use crate::parser::Superscript;

 #[tracing::instrument(ret, level = "debug")]
 pub fn subscript<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Subscript<'s>> {
-    not_yet_implemented()?;
-    todo!()
+    // We check for the underscore first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily.
+    let (remaining, _) = tag("_")(input)?;
+    pre(context, input)?;
+    let (remaining, _body) = script_body(context, remaining)?;
+    let (remaining, _) = space0(remaining)?;
+    let source = get_consumed(input, remaining);
+    Ok((remaining, Subscript { source }))
 }

 #[tracing::instrument(ret, level = "debug")]
@ -15,6 +38,84 @@ pub fn superscript<'r, 's>(
    context: Context<'r, 's>,
    input: &'s str,
 ) -> Res<&'s str, Superscript<'s>> {
-    not_yet_implemented()?;
-    todo!()
+    // We check for the circumflex first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily.
+    let (remaining, _) = tag("^")(input)?;
+    pre(context, input)?;
+    let (remaining, _body) = script_body(context, remaining)?;
+    let (remaining, _) = space0(remaining)?;
+    let source = get_consumed(input, remaining);
+    Ok((remaining, Superscript { source }))
+}
+
+#[tracing::instrument(ret, level = "debug")]
+fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
+    let document_root = context.get_document_root().unwrap();
+    let preceding_character = get_one_before(document_root, input)
+        .map(|slice| slice.chars().next())
+        .flatten();
+    match preceding_character {
+        Some(c) if !c.is_whitespace() => {}
+        _ => {
+            return Err(nom::Err::Error(CustomError::MyError(MyError(
+                "Must be preceded by a non-whitespace character.",
+            ))));
+        }
+    };
+    Ok((input, ()))
+}
+
+#[derive(Debug)]
+enum ScriptBody<'s> {
+    Braceless(&'s str),
+    WithBraces(Vec<Object<'s>>),
+}
+
+#[tracing::instrument(ret, level = "debug")]
+fn script_body<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ScriptBody<'s>> {
+    alt((
+        map(parser_with_context!(script_asterisk)(context), |body| {
+            ScriptBody::Braceless(body)
+        }),
+        map(parser_with_context!(script_alphanum)(context), |body| {
+            ScriptBody::Braceless(body)
+        }),
+    ))(input)
+}
+
+#[tracing::instrument(ret, level = "debug")]
+fn script_asterisk<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
+    tag("*")(input)
+}
+
+#[tracing::instrument(ret, level = "debug")]
+fn script_alphanum<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
+    let (remaining, _sign) = opt(recognize(one_of("+-")))(input)?;
+    let (remaining, _script) = many_till(
+        parser_with_context!(script_alphanum_character)(context),
+        parser_with_context!(end_script_alphanum_character)(context),
+    )(remaining)?;
+    let source = get_consumed(input, remaining);
+    Ok((remaining, source))
+}
+
+#[tracing::instrument(ret, level = "debug")]
+fn script_alphanum_character<'r, 's>(
+    context: Context<'r, 's>,
+    input: &'s str,
+) -> Res<&'s str, &'s str> {
+    recognize(verify(anychar, |c| {
+        c.is_alphanumeric() || r#",.\"#.contains(*c)
+    }))(input)
+}
+
+#[tracing::instrument(ret, level = "debug")]
+fn end_script_alphanum_character<'r, 's>(
+    context: Context<'r, 's>,
+    input: &'s str,
+) -> Res<&'s str, &'s str> {
+    let (remaining, final_char) = recognize(verify(anychar, |c| c.is_alphanumeric()))(input)?;
+    peek(not(parser_with_context!(script_alphanum_character)(
+        context,
+    )))(remaining)?;
+    Ok((remaining, final_char))
 }