From 993c73dc9fee4964e94b58ca9b1610bdfde1e421 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 24 Jul 2023 14:19:19 -0400 Subject: [PATCH 1/7] Create structure for subscript and superscript. --- src/compare/diff.rs | 50 +++++++++++++++++++++++++ src/parser/mod.rs | 3 ++ src/parser/object.rs | 26 +++++++++++++ src/parser/object_parser.rs | 18 ++++++++- src/parser/subscript_and_superscript.rs | 20 ++++++++++ src/parser/token.rs | 2 + 6 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 src/parser/subscript_and_superscript.rs diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 74cde67..79741bd 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1,5 +1,7 @@ use super::util::assert_bounds; use super::util::assert_name; +use crate::parser::Subscript; +use crate::parser::Superscript; use crate::parser::sexp::Token; use crate::parser::AngleLink; use crate::parser::Bold; @@ -176,6 +178,8 @@ fn compare_object<'s>( Object::LineBreak(obj) => compare_line_break(source, emacs, obj), Object::Target(obj) => compare_target(source, emacs, obj), Object::StatisticsCookie(obj) => compare_statistics_cookie(source, emacs, obj), + Object::Subscript(obj) => compare_subscript(source, emacs, obj), + Object::Superscript(obj) => compare_superscript(source, emacs, obj), } } @@ -1513,3 +1517,49 @@ fn compare_statistics_cookie<'s>( children: Vec::new(), }) } + +fn compare_subscript<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Subscript<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "subscript"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: emacs_name.to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_superscript<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Superscript<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "superscript"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: emacs_name.to_owned(), + message: None, + children: Vec::new(), + }) +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3f0f284..a37107c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -43,6 +43,7 @@ mod regular_link; pub mod sexp; mod source; mod statistics_cookie; +mod subscript_and_superscript; mod table; mod target; mod text_markup; @@ -100,6 +101,8 @@ pub use object::RadioTarget; pub use object::RegularLink; pub use object::StatisticsCookie; pub use object::StrikeThrough; +pub use object::Subscript; +pub use object::Superscript; pub use object::Target; pub use object::Underline; pub use object::Verbatim; diff --git a/src/parser/object.rs b/src/parser/object.rs index 300e038..d9acdd3 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -26,6 +26,8 @@ pub enum Object<'s> { LineBreak(LineBreak<'s>), Target(Target<'s>), StatisticsCookie(StatisticsCookie<'s>), + Subscript(Subscript<'s>), + Superscript(Superscript<'s>), } #[derive(Debug, PartialEq)] @@ -167,6 +169,16 @@ pub struct StatisticsCookie<'s> { pub source: &'s str, } +#[derive(Debug, PartialEq)] +pub struct Subscript<'s> { + pub source: &'s str, +} + +#[derive(Debug, PartialEq)] +pub struct Superscript<'s> { + pub source: &'s str, +} + impl<'s> Source<'s> for Object<'s> { fn get_source(&'s self) -> &'s str { match self { @@ -194,6 +206,8 @@ impl<'s> Source<'s> for Object<'s> { Object::LineBreak(obj) => obj.source, Object::Target(obj) => obj.source, Object::StatisticsCookie(obj) => obj.source, + Object::Subscript(obj) => obj.source, + Object::Superscript(obj) => obj.source, } } } @@ -335,3 +349,15 @@ impl<'s> Source<'s> for StatisticsCookie<'s> { self.source } } + +impl<'s> Source<'s> for Subscript<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for Superscript<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} diff --git a/src/parser/object_parser.rs b/src/parser/object_parser.rs index 549aea8..b15819e 100644 --- a/src/parser/object_parser.rs +++ b/src/parser/object_parser.rs @@ -22,6 +22,8 @@ use crate::parser::plain_link::plain_link; use crate::parser::radio_link::radio_link; use crate::parser::radio_link::radio_target; use crate::parser::statistics_cookie::statistics_cookie; +use crate::parser::subscript_and_superscript::subscript; +use crate::parser::subscript_and_superscript::superscript; use crate::parser::target::target; use crate::parser::text_markup::text_markup; @@ -34,6 +36,11 @@ pub fn standard_set_object<'r, 's>( not(|i| context.check_exit_matcher(i))(input)?; alt(( + map(parser_with_context!(subscript)(context), Object::Subscript), + map( + parser_with_context!(superscript)(context), + Object::Superscript, + ), map( parser_with_context!(statistics_cookie)(context), Object::StatisticsCookie, @@ -84,10 +91,14 @@ pub fn minimal_set_object<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Object<'s>> { - // TODO: superscripts and subscripts not(|i| context.check_exit_matcher(i))(input)?; alt(( + map(parser_with_context!(subscript)(context), Object::Subscript), + map( + parser_with_context!(superscript)(context), + Object::Superscript, + ), map(parser_with_context!(entity)(context), Object::Entity), map( parser_with_context!(latex_fragment)(context), @@ -105,6 +116,11 @@ pub fn any_object_except_plain_text<'r, 's>( ) -> Res<&'s str, Object<'s>> { // Used for exit matchers so this does not check exit matcher condition. alt(( + map(parser_with_context!(subscript)(context), Object::Subscript), + map( + parser_with_context!(superscript)(context), + Object::Superscript, + ), map( parser_with_context!(statistics_cookie)(context), Object::StatisticsCookie, diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs new file mode 100644 index 0000000..4cb85da --- /dev/null +++ b/src/parser/subscript_and_superscript.rs @@ -0,0 +1,20 @@ +use super::Context; +use crate::error::Res; +use crate::parser::util::not_yet_implemented; +use crate::parser::Subscript; +use crate::parser::Superscript; + +#[tracing::instrument(ret, level = "debug")] +pub fn subscript<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Subscript<'s>> { + not_yet_implemented()?; + todo!() +} + +#[tracing::instrument(ret, level = "debug")] +pub fn superscript<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, Superscript<'s>> { + not_yet_implemented()?; + todo!() +} diff --git a/src/parser/token.rs b/src/parser/token.rs index b9ea9dd..45f51ba 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -65,6 +65,8 @@ impl<'r, 's> Token<'r, 's> { Object::LineBreak(_) => Box::new(std::iter::empty()), Object::Target(_) => Box::new(std::iter::empty()), Object::StatisticsCookie(_) => Box::new(std::iter::empty()), + Object::Subscript(_) => Box::new(std::iter::empty()), // TODO: Iterate over children + Object::Superscript(_) => Box::new(std::iter::empty()), // TODO: Iterate over children }, Token::Element(elem) => match elem { Element::Paragraph(inner) => Box::new(inner.children.iter().map(Token::Object)), From 4a565601c181287062e52374c9497a2031bea797 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 24 Jul 2023 14:29:20 -0400 Subject: [PATCH 2/7] Add test cases. --- org_mode_samples/subscript_and_superscript/simple.org | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 org_mode_samples/subscript_and_superscript/simple.org diff --git a/org_mode_samples/subscript_and_superscript/simple.org b/org_mode_samples/subscript_and_superscript/simple.org new file mode 100644 index 0000000..ffc382e --- /dev/null +++ b/org_mode_samples/subscript_and_superscript/simple.org @@ -0,0 +1,6 @@ +foo^* +bar_* +baz^{} +lorem_{} +ipsum^+,\.a5 +dolar_,\.a5 From 8c00ee24ba9fd11945e2ebce4e85ab7cbf0eb508 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 24 Jul 2023 14:33:04 -0400 Subject: [PATCH 3/7] Add a test to prove that subscript/superscript cannot start without a leading character even though its at the start of the file. --- org_mode_samples/subscript_and_superscript/simple.org | 2 +- org_mode_samples/subscript_and_superscript/start_of_file.org | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 org_mode_samples/subscript_and_superscript/start_of_file.org diff --git a/org_mode_samples/subscript_and_superscript/simple.org b/org_mode_samples/subscript_and_superscript/simple.org index ffc382e..74cc815 100644 --- a/org_mode_samples/subscript_and_superscript/simple.org +++ b/org_mode_samples/subscript_and_superscript/simple.org @@ -1,6 +1,6 @@ foo^* bar_* -baz^{} +baz^{hello *world*} lorem_{} ipsum^+,\.a5 dolar_,\.a5 diff --git a/org_mode_samples/subscript_and_superscript/start_of_file.org b/org_mode_samples/subscript_and_superscript/start_of_file.org new file mode 100644 index 0000000..962b996 --- /dev/null +++ b/org_mode_samples/subscript_and_superscript/start_of_file.org @@ -0,0 +1 @@ +_{foo} From 6d4379d02959f1839ce4b93ccddabf5beb49061c Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 24 Jul 2023 14:44:27 -0400 Subject: [PATCH 4/7] Add a License section to the README. --- README.org | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.org b/README.org index 166efbd..b687e08 100644 --- a/README.org +++ b/README.org @@ -5,3 +5,6 @@ Organic is an emacs-less implementation of an [[https://orgmode.org/][org-mode]] * Project Status This project is a personal learning project to grow my experience in [[https://www.rust-lang.org/][rust]]. It is under development and at this time I would not recommend anyone use this code. The goal is to turn this into a project others can use, at which point more information will appear in this README. + +* License +This project is released under the public-domain-equivalent [[https://www.tldrlegal.com/license/bsd-0-clause-license][0BSD license]]. This license puts no restrictions on the use of this code (you do not even have to include the copyright notice or license text when using it). HOWEVER, this project has a couple permissively licensed dependencies which do require their copyright notices and/or license texts to be included. I am not a lawyer and this is not legal advice but it is my layperson's understanding that if you distribute a binary with this library linked in, you will need to abide by their terms since their code will also be linked in your binary. I try to keep the dependencies to a minimum and the most restrictive dependency I will ever include is a permissively licensed one. From f717d5e7df0760885e1ac82eac58444b0400cd24 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 24 Jul 2023 15:41:14 -0400 Subject: [PATCH 5/7] Implement parser for braceless subscript/superscript. --- .../subscript_and_superscript/simple.org | 1 + src/parser/subscript_and_superscript.rs | 111 +++++++++++++++++- 2 files changed, 107 insertions(+), 5 deletions(-) diff --git a/org_mode_samples/subscript_and_superscript/simple.org b/org_mode_samples/subscript_and_superscript/simple.org index 74cc815..b24299f 100644 --- a/org_mode_samples/subscript_and_superscript/simple.org +++ b/org_mode_samples/subscript_and_superscript/simple.org @@ -4,3 +4,4 @@ baz^{hello *world*} lorem_{} ipsum^+,\.a5 dolar_,\.a5 +text before foo_7 text afterwards diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs index 4cb85da..1af3970 100644 --- a/src/parser/subscript_and_superscript.rs +++ b/src/parser/subscript_and_superscript.rs @@ -1,13 +1,36 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::anychar; +use nom::character::complete::one_of; +use nom::character::complete::space0; +use nom::combinator::map; +use nom::combinator::not; +use nom::combinator::opt; +use nom::combinator::peek; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many_till; + use super::Context; +use super::Object; +use crate::error::CustomError; +use crate::error::MyError; use crate::error::Res; -use crate::parser::util::not_yet_implemented; +use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::get_consumed; +use crate::parser::util::get_one_before; use crate::parser::Subscript; use crate::parser::Superscript; #[tracing::instrument(ret, level = "debug")] pub fn subscript<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Subscript<'s>> { - not_yet_implemented()?; - todo!() + // We check for the underscore first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily. + let (remaining, _) = tag("_")(input)?; + pre(context, input)?; + let (remaining, _body) = script_body(context, remaining)?; + let (remaining, _) = space0(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, Subscript { source })) } #[tracing::instrument(ret, level = "debug")] @@ -15,6 +38,84 @@ pub fn superscript<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Superscript<'s>> { - not_yet_implemented()?; - todo!() + // We check for the circumflex first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily. + let (remaining, _) = tag("^")(input)?; + pre(context, input)?; + let (remaining, _body) = script_body(context, remaining)?; + let (remaining, _) = space0(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, Superscript { source })) +} + +#[tracing::instrument(ret, level = "debug")] +fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + match preceding_character { + Some(c) if !c.is_whitespace() => {} + _ => { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Must be preceded by a non-whitespace character.", + )))); + } + }; + Ok((input, ())) +} + +#[derive(Debug)] +enum ScriptBody<'s> { + Braceless(&'s str), + WithBraces(Vec>), +} + +#[tracing::instrument(ret, level = "debug")] +fn script_body<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ScriptBody<'s>> { + alt(( + map(parser_with_context!(script_asterisk)(context), |body| { + ScriptBody::Braceless(body) + }), + map(parser_with_context!(script_alphanum)(context), |body| { + ScriptBody::Braceless(body) + }), + ))(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn script_asterisk<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + tag("*")(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn script_alphanum<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let (remaining, _sign) = opt(recognize(one_of("+-")))(input)?; + let (remaining, _script) = many_till( + parser_with_context!(script_alphanum_character)(context), + parser_with_context!(end_script_alphanum_character)(context), + )(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} + +#[tracing::instrument(ret, level = "debug")] +fn script_alphanum_character<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + recognize(verify(anychar, |c| { + c.is_alphanumeric() || r#",.\"#.contains(*c) + }))(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn end_script_alphanum_character<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + let (remaining, final_char) = recognize(verify(anychar, |c| c.is_alphanumeric()))(input)?; + peek(not(parser_with_context!(script_alphanum_character)( + context, + )))(remaining)?; + Ok((remaining, final_char)) } From 23d587c6992dfae4772bfa8c44caa90f24c07a48 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 24 Jul 2023 16:29:31 -0400 Subject: [PATCH 6/7] Implement parser for subscript/superscript with braces. --- src/parser/parser_context.rs | 18 ++++++ src/parser/subscript_and_superscript.rs | 83 +++++++++++++++++++++++++ 2 files changed, 101 insertions(+) diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index a4809e7..e7063e4 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -194,6 +194,18 @@ pub enum ContextElement<'r, 's> { /// unbalanced brackets can be detected in the middle of an /// object. InlineSourceBlockBracket(InlineSourceBlockBracket<'s>), + + /// Stores the current bracket or parenthesis depth inside a + /// superscript or superscript. + /// + /// Inside the braces of a subscript or superscript there must be + /// balanced braces {}, so this stores the amount of opening + /// braces subtracted by the amount of closing braces within the + /// definition must equal zero. + /// + /// A reference to the position in the string is also included so + /// unbalanced braces can be detected in the middle of an object. + SubscriptSuperscriptBrace(SubscriptSuperscriptBrace<'s>), } pub struct ExitMatcherNode<'r> { @@ -225,6 +237,12 @@ pub struct InlineSourceBlockBracket<'s> { pub depth: usize, } +#[derive(Debug)] +pub struct SubscriptSuperscriptBrace<'s> { + pub position: &'s str, + pub depth: usize, +} + impl<'r> std::fmt::Debug for ExitMatcherNode<'r> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut formatter = f.debug_struct("ExitMatcherNode"); diff --git a/src/parser/subscript_and_superscript.rs b/src/parser/subscript_and_superscript.rs index 1af3970..5ac1df7 100644 --- a/src/parser/subscript_and_superscript.rs +++ b/src/parser/subscript_and_superscript.rs @@ -16,7 +16,13 @@ use super::Object; use crate::error::CustomError; use crate::error::MyError; use crate::error::Res; +use crate::parser::exiting::ExitClass; +use crate::parser::object_parser::standard_set_object; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::parser_context::SubscriptSuperscriptBrace; use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::parser::util::get_one_before; use crate::parser::Subscript; @@ -79,6 +85,9 @@ fn script_body<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, map(parser_with_context!(script_alphanum)(context), |body| { ScriptBody::Braceless(body) }), + map(parser_with_context!(script_with_braces)(context), |body| { + ScriptBody::WithBraces(body) + }), ))(input) } @@ -119,3 +128,77 @@ fn end_script_alphanum_character<'r, 's>( )))(remaining)?; Ok((remaining, final_char)) } + +#[tracing::instrument(ret, level = "debug")] +fn script_with_braces<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, Vec>> { + let (remaining, _) = tag("{")(input)?; + let parser_context = context + .with_additional_node(ContextElement::SubscriptSuperscriptBrace( + SubscriptSuperscriptBrace { + position: remaining, + depth: 0, + }, + )) + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &script_with_braces_end, + })); + + let (remaining, (children, _exit_contents)) = many_till( + parser_with_context!(standard_set_object)(&parser_context), + parser_with_context!(exit_matcher_parser)(&parser_context), + )(remaining)?; + + let (remaining, _) = tag("}")(remaining)?; + Ok((remaining, children)) +} + +#[tracing::instrument(ret, level = "debug")] +fn script_with_braces_end<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + let context_depth = get_bracket_depth(context) + .expect("This function should only be called from inside a subscript or superscript."); + let text_since_context_entry = get_consumed(context_depth.position, input); + let mut current_depth = context_depth.depth; + for c in text_since_context_entry.chars() { + match c { + '{' => { + current_depth += 1; + } + '}' if current_depth == 0 => { + panic!("Exceeded subscript or superscript brace depth.") + } + '}' if current_depth > 0 => { + current_depth -= 1; + } + _ => {} + } + } + if current_depth == 0 { + let close_bracket = tag::<&str, &str, CustomError<&str>>("}")(input); + if close_bracket.is_ok() { + return close_bracket; + } + } + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Not a valid end for subscript or superscript.", + )))); +} + +#[tracing::instrument(ret, level = "debug")] +fn get_bracket_depth<'r, 's>( + context: Context<'r, 's>, +) -> Option<&'r SubscriptSuperscriptBrace<'s>> { + for node in context.iter() { + match node.get_data() { + ContextElement::SubscriptSuperscriptBrace(depth) => return Some(depth), + _ => {} + } + } + None +} From 63fcad2ac698d6d3bd4a30af26eb1fe1d958026c Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Mon, 24 Jul 2023 16:32:56 -0400 Subject: [PATCH 7/7] Enable tests that needed subscript/superscript implemented. --- build.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/build.rs b/build.rs index d66de88..75f7b6b 100644 --- a/build.rs +++ b/build.rs @@ -73,11 +73,6 @@ fn is_expect_fail(name: &str) -> Option<&str> { match name { "drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."), "element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."), - "element_container_priority_drawer_greater_block" => Some("Need to implement subscript."), - "element_container_priority_dynamic_block_greater_block" => Some("Need to implement subscript."), - "element_container_priority_footnote_definition_greater_block" => Some("Need to implement subscript."), - "element_container_priority_greater_block_greater_block" => Some("Need to implement subscript."), - "element_container_priority_section_greater_block" => Some("Need to implement subscript."), "paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), "export_snippet_paragraph_break_precedent" => Some("Emacs 28 has broken behavior so the tests in the CI fail."), _ => None,