diff --git a/README.org b/README.org index 166efbd..b687e08 100644 --- a/README.org +++ b/README.org @@ -5,3 +5,6 @@ Organic is an emacs-less implementation of an [[https://orgmode.org/][org-mode]] * Project Status This project is a personal learning project to grow my experience in [[https://www.rust-lang.org/][rust]]. It is under development and at this time I would not recommend anyone use this code. The goal is to turn this into a project others can use, at which point more information will appear in this README. + +* License +This project is released under the public-domain-equivalent [[https://www.tldrlegal.com/license/bsd-0-clause-license][0BSD license]]. This license puts no restrictions on the use of this code (you do not even have to include the copyright notice or license text when using it). HOWEVER, this project has a couple permissively licensed dependencies which do require their copyright notices and/or license texts to be included. I am not a lawyer and this is not legal advice but it is my layperson's understanding that if you distribute a binary with this library linked in, you will need to abide by their terms since their code will also be linked in your binary. I try to keep the dependencies to a minimum and the most restrictive dependency I will ever include is a permissively licensed one. diff --git a/build.rs b/build.rs index d66de88..75f7b6b 100644 --- a/build.rs +++ b/build.rs @@ -73,11 +73,6 @@ fn is_expect_fail(name: &str) -> Option<&str> { match name { "drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."), "element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."), - "element_container_priority_drawer_greater_block" => Some("Need to implement subscript."), - "element_container_priority_dynamic_block_greater_block" => Some("Need to implement subscript."), - "element_container_priority_footnote_definition_greater_block" => Some("Need to implement subscript."), - "element_container_priority_greater_block_greater_block" => Some("Need to implement subscript."), - "element_container_priority_section_greater_block" => Some("Need to implement subscript."), "paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), "export_snippet_paragraph_break_precedent" => Some("Emacs 28 has broken behavior so the tests in the CI fail."), _ => None, diff --git a/org_mode_samples/subscript_and_superscript/simple.org b/org_mode_samples/subscript_and_superscript/simple.org new file mode 100644 index 0000000..b24299f --- /dev/null +++ b/org_mode_samples/subscript_and_superscript/simple.org @@ -0,0 +1,7 @@ +foo^* +bar_* +baz^{hello *world*} +lorem_{} +ipsum^+,\.a5 +dolar_,\.a5 +text before foo_7 text afterwards diff --git a/org_mode_samples/subscript_and_superscript/start_of_file.org b/org_mode_samples/subscript_and_superscript/start_of_file.org new file mode 100644 index 0000000..962b996 --- /dev/null +++ b/org_mode_samples/subscript_and_superscript/start_of_file.org @@ -0,0 +1 @@ +_{foo} diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 74cde67..79741bd 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1,5 +1,7 @@ use super::util::assert_bounds; use super::util::assert_name; +use crate::parser::Subscript; +use crate::parser::Superscript; use crate::parser::sexp::Token; use crate::parser::AngleLink; use crate::parser::Bold; @@ -176,6 +178,8 @@ fn compare_object<'s>( Object::LineBreak(obj) => compare_line_break(source, emacs, obj), Object::Target(obj) => compare_target(source, emacs, obj), Object::StatisticsCookie(obj) => compare_statistics_cookie(source, emacs, obj), + Object::Subscript(obj) => compare_subscript(source, emacs, obj), + Object::Superscript(obj) => compare_superscript(source, emacs, obj), } } @@ -1513,3 +1517,49 @@ fn compare_statistics_cookie<'s>( children: Vec::new(), }) } + +fn compare_subscript<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Subscript<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "subscript"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: emacs_name.to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_superscript<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Superscript<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "superscript"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: emacs_name.to_owned(), + message: None, + children: Vec::new(), + }) +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3f0f284..a37107c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -43,6 +43,7 @@ mod regular_link; pub mod sexp; mod source; mod statistics_cookie; +mod subscript_and_superscript; mod table; mod target; mod text_markup; @@ -100,6 +101,8 @@ pub use object::RadioTarget; pub use object::RegularLink; pub use object::StatisticsCookie; pub use object::StrikeThrough; +pub use object::Subscript; +pub use object::Superscript; pub use object::Target; pub use object::Underline; pub use object::Verbatim; diff --git a/src/parser/object.rs b/src/parser/object.rs index 300e038..d9acdd3 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -26,6 +26,8 @@ pub enum Object<'s> { LineBreak(LineBreak<'s>), Target(Target<'s>), StatisticsCookie(StatisticsCookie<'s>), + Subscript(Subscript<'s>), + Superscript(Superscript<'s>), } #[derive(Debug, PartialEq)] @@ -167,6 +169,16 @@ pub struct StatisticsCookie<'s> { pub source: &'s str, } +#[derive(Debug, PartialEq)] +pub struct Subscript<'s> { + pub source: &'s str, +} + +#[derive(Debug, PartialEq)] +pub struct Superscript<'s> { + pub source: &'s str, +} + impl<'s> Source<'s> for Object<'s> { fn get_source(&'s self) -> &'s str { match self { @@ -194,6 +206,8 @@ impl<'s> Source<'s> for Object<'s> { Object::LineBreak(obj) => obj.source, Object::Target(obj) => obj.source, Object::StatisticsCookie(obj) => obj.source, + Object::Subscript(obj) => obj.source, + Object::Superscript(obj) => obj.source, } } } @@ -335,3 +349,15 @@ impl<'s> Source<'s> for StatisticsCookie<'s> { self.source } } + +impl<'s> Source<'s> for Subscript<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for Superscript<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} diff --git a/src/parser/object_parser.rs b/src/parser/object_parser.rs index 549aea8..b15819e 100644 --- a/src/parser/object_parser.rs +++ b/src/parser/object_parser.rs @@ -22,6 +22,8 @@ use crate::parser::plain_link::plain_link; use crate::parser::radio_link::radio_link; use crate::parser::radio_link::radio_target; use crate::parser::statistics_cookie::statistics_cookie; +use crate::parser::subscript_and_superscript::subscript; +use crate::parser::subscript_and_superscript::superscript; use crate::parser::target::target; use crate::parser::text_markup::text_markup; @@ -34,6 +36,11 @@ pub fn standard_set_object<'r, 's>( not(|i| context.check_exit_matcher(i))(input)?; alt(( + map(parser_with_context!(subscript)(context), Object::Subscript), + map( + parser_with_context!(superscript)(context), + Object::Superscript, + ), map( parser_with_context!(statistics_cookie)(context), Object::StatisticsCookie, @@ -84,10 +91,14 @@ pub fn minimal_set_object<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Object<'s>> { - // TODO: superscripts and subscripts not(|i| context.check_exit_matcher(i))(input)?; alt(( + map(parser_with_context!(subscript)(context), Object::Subscript), + map( + parser_with_context!(superscript)(context), + Object::Superscript, + ), map(parser_with_context!(entity)(context), Object::Entity), map( parser_with_context!(latex_fragment)(context), @@ -105,6 +116,11 @@ pub fn any_object_except_plain_text<'r, 's>( ) -> Res<&'s str, Object<'s>> { // Used for exit matchers so this does not check exit matcher condition. alt(( + map(parser_with_context!(subscript)(context), Object::Subscript), + map( + parser_with_context!(superscript)(context), + Object::Superscript, + ), map( parser_with_context!(statistics_cookie)(context), Object::StatisticsCookie, diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index a4809e7..e7063e4 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -194,6 +194,18 @@ pub enum ContextElement<'r, 's> { /// unbalanced brackets can be detected in the middle of an /// object. InlineSourceBlockBracket(InlineSourceBlockBracket<'s>), + + /// Stores the current bracket or parenthesis depth inside a + /// superscript or superscript. + /// + /// Inside the braces of a subscript or superscript there must be + /// balanced braces {}, so this stores the amount of opening + /// braces subtracted by the amount of closing braces within the + /// definition must equal zero. + /// + /// A reference to the position in the string is also included so + /// unbalanced braces can be detected in the middle of an object. + SubscriptSuperscriptBrace(SubscriptSuperscriptBrace<'s>), } pub struct ExitMatcherNode<'r> { @@ -225,6 +237,12 @@ pub struct InlineSourceBlockBracket<'s> { pub depth: usize, } +#[derive(Debug)] +pub struct SubscriptSuperscriptBrace<'s> { + pub position: &'s str, + pub depth: usize, +} + impl<'r> std::fmt::Debug for ExitMatcherNode<'r> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut formatter = f.debug_struct("ExitMatcherNode"); diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs new file mode 100644 index 0000000..5ac1df7 --- /dev/null +++ b/src/parser/subscript_and_superscript.rs @@ -0,0 +1,204 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::anychar; +use nom::character::complete::one_of; +use nom::character::complete::space0; +use nom::combinator::map; +use nom::combinator::not; +use nom::combinator::opt; +use nom::combinator::peek; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many_till; + +use super::Context; +use super::Object; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; +use crate::parser::exiting::ExitClass; +use crate::parser::object_parser::standard_set_object; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::parser_context::SubscriptSuperscriptBrace; +use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::exit_matcher_parser; +use crate::parser::util::get_consumed; +use crate::parser::util::get_one_before; +use crate::parser::Subscript; +use crate::parser::Superscript; + +#[tracing::instrument(ret, level = "debug")] +pub fn subscript<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Subscript<'s>> { + // We check for the underscore first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily. + let (remaining, _) = tag("_")(input)?; + pre(context, input)?; + let (remaining, _body) = script_body(context, remaining)?; + let (remaining, _) = space0(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, Subscript { source })) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn superscript<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, Superscript<'s>> { + // We check for the circumflex first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily. + let (remaining, _) = tag("^")(input)?; + pre(context, input)?; + let (remaining, _body) = script_body(context, remaining)?; + let (remaining, _) = space0(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, Superscript { source })) +} + +#[tracing::instrument(ret, level = "debug")] +fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + match preceding_character { + Some(c) if !c.is_whitespace() => {} + _ => { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Must be preceded by a non-whitespace character.", + )))); + } + }; + Ok((input, ())) +} + +#[derive(Debug)] +enum ScriptBody<'s> { + Braceless(&'s str), + WithBraces(Vec>), +} + +#[tracing::instrument(ret, level = "debug")] +fn script_body<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ScriptBody<'s>> { + alt(( + map(parser_with_context!(script_asterisk)(context), |body| { + ScriptBody::Braceless(body) + }), + map(parser_with_context!(script_alphanum)(context), |body| { + ScriptBody::Braceless(body) + }), + map(parser_with_context!(script_with_braces)(context), |body| { + ScriptBody::WithBraces(body) + }), + ))(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn script_asterisk<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + tag("*")(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn script_alphanum<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let (remaining, _sign) = opt(recognize(one_of("+-")))(input)?; + let (remaining, _script) = many_till( + parser_with_context!(script_alphanum_character)(context), + parser_with_context!(end_script_alphanum_character)(context), + )(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} + +#[tracing::instrument(ret, level = "debug")] +fn script_alphanum_character<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + recognize(verify(anychar, |c| { + c.is_alphanumeric() || r#",.\"#.contains(*c) + }))(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn end_script_alphanum_character<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + let (remaining, final_char) = recognize(verify(anychar, |c| c.is_alphanumeric()))(input)?; + peek(not(parser_with_context!(script_alphanum_character)( + context, + )))(remaining)?; + Ok((remaining, final_char)) +} + +#[tracing::instrument(ret, level = "debug")] +fn script_with_braces<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, Vec>> { + let (remaining, _) = tag("{")(input)?; + let parser_context = context + .with_additional_node(ContextElement::SubscriptSuperscriptBrace( + SubscriptSuperscriptBrace { + position: remaining, + depth: 0, + }, + )) + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &script_with_braces_end, + })); + + let (remaining, (children, _exit_contents)) = many_till( + parser_with_context!(standard_set_object)(&parser_context), + parser_with_context!(exit_matcher_parser)(&parser_context), + )(remaining)?; + + let (remaining, _) = tag("}")(remaining)?; + Ok((remaining, children)) +} + +#[tracing::instrument(ret, level = "debug")] +fn script_with_braces_end<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + let context_depth = get_bracket_depth(context) + .expect("This function should only be called from inside a subscript or superscript."); + let text_since_context_entry = get_consumed(context_depth.position, input); + let mut current_depth = context_depth.depth; + for c in text_since_context_entry.chars() { + match c { + '{' => { + current_depth += 1; + } + '}' if current_depth == 0 => { + panic!("Exceeded subscript or superscript brace depth.") + } + '}' if current_depth > 0 => { + current_depth -= 1; + } + _ => {} + } + } + if current_depth == 0 { + let close_bracket = tag::<&str, &str, CustomError<&str>>("}")(input); + if close_bracket.is_ok() { + return close_bracket; + } + } + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Not a valid end for subscript or superscript.", + )))); +} + +#[tracing::instrument(ret, level = "debug")] +fn get_bracket_depth<'r, 's>( + context: Context<'r, 's>, +) -> Option<&'r SubscriptSuperscriptBrace<'s>> { + for node in context.iter() { + match node.get_data() { + ContextElement::SubscriptSuperscriptBrace(depth) => return Some(depth), + _ => {} + } + } + None +} diff --git a/src/parser/token.rs b/src/parser/token.rs index b9ea9dd..45f51ba 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -65,6 +65,8 @@ impl<'r, 's> Token<'r, 's> { Object::LineBreak(_) => Box::new(std::iter::empty()), Object::Target(_) => Box::new(std::iter::empty()), Object::StatisticsCookie(_) => Box::new(std::iter::empty()), + Object::Subscript(_) => Box::new(std::iter::empty()), // TODO: Iterate over children + Object::Superscript(_) => Box::new(std::iter::empty()), // TODO: Iterate over children }, Token::Element(elem) => match elem { Element::Paragraph(inner) => Box::new(inner.children.iter().map(Token::Object)),