Merge branch 'subscript_superscript'
All checks were successful
semver Build semver has succeeded
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded

This commit is contained in:
Tom Alexander 2023-07-24 16:33:13 -04:00
commit 3b11d8fb61
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
11 changed files with 331 additions and 6 deletions

View File

@ -5,3 +5,6 @@ Organic is an emacs-less implementation of an [[https://orgmode.org/][org-mode]]
* Project Status
This project is a personal learning project to grow my experience in [[https://www.rust-lang.org/][rust]]. It is under development and at this time I would not recommend anyone use this code. The goal is to turn this into a project others can use, at which point more information will appear in this README.
* License
This project is released under the public-domain-equivalent [[https://www.tldrlegal.com/license/bsd-0-clause-license][0BSD license]]. This license puts no restrictions on the use of this code (you do not even have to include the copyright notice or license text when using it). HOWEVER, this project has a couple permissively licensed dependencies which do require their copyright notices and/or license texts to be included. I am not a lawyer and this is not legal advice but it is my layperson's understanding that if you distribute a binary with this library linked in, you will need to abide by their terms since their code will also be linked in your binary. I try to keep the dependencies to a minimum and the most restrictive dependency I will ever include is a permissively licensed one.

View File

@ -73,11 +73,6 @@ fn is_expect_fail(name: &str) -> Option<&str> {
match name {
"drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."),
"element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
"element_container_priority_drawer_greater_block" => Some("Need to implement subscript."),
"element_container_priority_dynamic_block_greater_block" => Some("Need to implement subscript."),
"element_container_priority_footnote_definition_greater_block" => Some("Need to implement subscript."),
"element_container_priority_greater_block_greater_block" => Some("Need to implement subscript."),
"element_container_priority_section_greater_block" => Some("Need to implement subscript."),
"paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
"export_snippet_paragraph_break_precedent" => Some("Emacs 28 has broken behavior so the tests in the CI fail."),
_ => None,

View File

@ -0,0 +1,7 @@
foo^*
bar_*
baz^{hello *world*}
lorem_{}
ipsum^+,\.a5
dolar_,\.a5
text before foo_7 text afterwards

View File

@ -0,0 +1 @@
_{foo}

View File

@ -1,5 +1,7 @@
use super::util::assert_bounds;
use super::util::assert_name;
use crate::parser::Subscript;
use crate::parser::Superscript;
use crate::parser::sexp::Token;
use crate::parser::AngleLink;
use crate::parser::Bold;
@ -176,6 +178,8 @@ fn compare_object<'s>(
Object::LineBreak(obj) => compare_line_break(source, emacs, obj),
Object::Target(obj) => compare_target(source, emacs, obj),
Object::StatisticsCookie(obj) => compare_statistics_cookie(source, emacs, obj),
Object::Subscript(obj) => compare_subscript(source, emacs, obj),
Object::Superscript(obj) => compare_superscript(source, emacs, obj),
}
}
@ -1513,3 +1517,49 @@ fn compare_statistics_cookie<'s>(
children: Vec::new(),
})
}
fn compare_subscript<'s>(
source: &'s str,
emacs: &'s Token<'s>,
rust: &'s Subscript<'s>,
) -> Result<DiffResult, Box<dyn std::error::Error>> {
let mut this_status = DiffStatus::Good;
let emacs_name = "subscript";
if assert_name(emacs, emacs_name).is_err() {
this_status = DiffStatus::Bad;
}
if assert_bounds(source, emacs, rust).is_err() {
this_status = DiffStatus::Bad;
}
Ok(DiffResult {
status: this_status,
name: emacs_name.to_owned(),
message: None,
children: Vec::new(),
})
}
fn compare_superscript<'s>(
source: &'s str,
emacs: &'s Token<'s>,
rust: &'s Superscript<'s>,
) -> Result<DiffResult, Box<dyn std::error::Error>> {
let mut this_status = DiffStatus::Good;
let emacs_name = "superscript";
if assert_name(emacs, emacs_name).is_err() {
this_status = DiffStatus::Bad;
}
if assert_bounds(source, emacs, rust).is_err() {
this_status = DiffStatus::Bad;
}
Ok(DiffResult {
status: this_status,
name: emacs_name.to_owned(),
message: None,
children: Vec::new(),
})
}

View File

@ -43,6 +43,7 @@ mod regular_link;
pub mod sexp;
mod source;
mod statistics_cookie;
mod subscript_and_superscript;
mod table;
mod target;
mod text_markup;
@ -100,6 +101,8 @@ pub use object::RadioTarget;
pub use object::RegularLink;
pub use object::StatisticsCookie;
pub use object::StrikeThrough;
pub use object::Subscript;
pub use object::Superscript;
pub use object::Target;
pub use object::Underline;
pub use object::Verbatim;

View File

@ -26,6 +26,8 @@ pub enum Object<'s> {
LineBreak(LineBreak<'s>),
Target(Target<'s>),
StatisticsCookie(StatisticsCookie<'s>),
Subscript(Subscript<'s>),
Superscript(Superscript<'s>),
}
#[derive(Debug, PartialEq)]
@ -167,6 +169,16 @@ pub struct StatisticsCookie<'s> {
pub source: &'s str,
}
#[derive(Debug, PartialEq)]
pub struct Subscript<'s> {
pub source: &'s str,
}
#[derive(Debug, PartialEq)]
pub struct Superscript<'s> {
pub source: &'s str,
}
impl<'s> Source<'s> for Object<'s> {
fn get_source(&'s self) -> &'s str {
match self {
@ -194,6 +206,8 @@ impl<'s> Source<'s> for Object<'s> {
Object::LineBreak(obj) => obj.source,
Object::Target(obj) => obj.source,
Object::StatisticsCookie(obj) => obj.source,
Object::Subscript(obj) => obj.source,
Object::Superscript(obj) => obj.source,
}
}
}
@ -335,3 +349,15 @@ impl<'s> Source<'s> for StatisticsCookie<'s> {
self.source
}
}
impl<'s> Source<'s> for Subscript<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}
impl<'s> Source<'s> for Superscript<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}

View File

@ -22,6 +22,8 @@ use crate::parser::plain_link::plain_link;
use crate::parser::radio_link::radio_link;
use crate::parser::radio_link::radio_target;
use crate::parser::statistics_cookie::statistics_cookie;
use crate::parser::subscript_and_superscript::subscript;
use crate::parser::subscript_and_superscript::superscript;
use crate::parser::target::target;
use crate::parser::text_markup::text_markup;
@ -34,6 +36,11 @@ pub fn standard_set_object<'r, 's>(
not(|i| context.check_exit_matcher(i))(input)?;
alt((
map(parser_with_context!(subscript)(context), Object::Subscript),
map(
parser_with_context!(superscript)(context),
Object::Superscript,
),
map(
parser_with_context!(statistics_cookie)(context),
Object::StatisticsCookie,
@ -84,10 +91,14 @@ pub fn minimal_set_object<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, Object<'s>> {
// TODO: superscripts and subscripts
not(|i| context.check_exit_matcher(i))(input)?;
alt((
map(parser_with_context!(subscript)(context), Object::Subscript),
map(
parser_with_context!(superscript)(context),
Object::Superscript,
),
map(parser_with_context!(entity)(context), Object::Entity),
map(
parser_with_context!(latex_fragment)(context),
@ -105,6 +116,11 @@ pub fn any_object_except_plain_text<'r, 's>(
) -> Res<&'s str, Object<'s>> {
// Used for exit matchers so this does not check exit matcher condition.
alt((
map(parser_with_context!(subscript)(context), Object::Subscript),
map(
parser_with_context!(superscript)(context),
Object::Superscript,
),
map(
parser_with_context!(statistics_cookie)(context),
Object::StatisticsCookie,

View File

@ -194,6 +194,18 @@ pub enum ContextElement<'r, 's> {
/// unbalanced brackets can be detected in the middle of an
/// object.
InlineSourceBlockBracket(InlineSourceBlockBracket<'s>),
/// Stores the current bracket or parenthesis depth inside a
/// superscript or superscript.
///
/// Inside the braces of a subscript or superscript there must be
/// balanced braces {}, so this stores the amount of opening
/// braces subtracted by the amount of closing braces within the
/// definition must equal zero.
///
/// A reference to the position in the string is also included so
/// unbalanced braces can be detected in the middle of an object.
SubscriptSuperscriptBrace(SubscriptSuperscriptBrace<'s>),
}
pub struct ExitMatcherNode<'r> {
@ -225,6 +237,12 @@ pub struct InlineSourceBlockBracket<'s> {
pub depth: usize,
}
#[derive(Debug)]
pub struct SubscriptSuperscriptBrace<'s> {
pub position: &'s str,
pub depth: usize,
}
impl<'r> std::fmt::Debug for ExitMatcherNode<'r> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut formatter = f.debug_struct("ExitMatcherNode");

View File

@ -0,0 +1,204 @@
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::anychar;
use nom::character::complete::one_of;
use nom::character::complete::space0;
use nom::combinator::map;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many_till;
use super::Context;
use super::Object;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::exiting::ExitClass;
use crate::parser::object_parser::standard_set_object;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::parser_context::SubscriptSuperscriptBrace;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::get_one_before;
use crate::parser::Subscript;
use crate::parser::Superscript;
#[tracing::instrument(ret, level = "debug")]
pub fn subscript<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Subscript<'s>> {
// We check for the underscore first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily.
let (remaining, _) = tag("_")(input)?;
pre(context, input)?;
let (remaining, _body) = script_body(context, remaining)?;
let (remaining, _) = space0(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, Subscript { source }))
}
#[tracing::instrument(ret, level = "debug")]
pub fn superscript<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, Superscript<'s>> {
// We check for the circumflex first before checking the pre-character as a minor optimization to avoid walking up the context tree to find the document root unnecessarily.
let (remaining, _) = tag("^")(input)?;
pre(context, input)?;
let (remaining, _body) = script_body(context, remaining)?;
let (remaining, _) = space0(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, Superscript { source }))
}
#[tracing::instrument(ret, level = "debug")]
fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
match preceding_character {
Some(c) if !c.is_whitespace() => {}
_ => {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Must be preceded by a non-whitespace character.",
))));
}
};
Ok((input, ()))
}
#[derive(Debug)]
enum ScriptBody<'s> {
Braceless(&'s str),
WithBraces(Vec<Object<'s>>),
}
#[tracing::instrument(ret, level = "debug")]
fn script_body<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ScriptBody<'s>> {
alt((
map(parser_with_context!(script_asterisk)(context), |body| {
ScriptBody::Braceless(body)
}),
map(parser_with_context!(script_alphanum)(context), |body| {
ScriptBody::Braceless(body)
}),
map(parser_with_context!(script_with_braces)(context), |body| {
ScriptBody::WithBraces(body)
}),
))(input)
}
#[tracing::instrument(ret, level = "debug")]
fn script_asterisk<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
tag("*")(input)
}
#[tracing::instrument(ret, level = "debug")]
fn script_alphanum<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let (remaining, _sign) = opt(recognize(one_of("+-")))(input)?;
let (remaining, _script) = many_till(
parser_with_context!(script_alphanum_character)(context),
parser_with_context!(end_script_alphanum_character)(context),
)(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
fn script_alphanum_character<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
recognize(verify(anychar, |c| {
c.is_alphanumeric() || r#",.\"#.contains(*c)
}))(input)
}
#[tracing::instrument(ret, level = "debug")]
fn end_script_alphanum_character<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
let (remaining, final_char) = recognize(verify(anychar, |c| c.is_alphanumeric()))(input)?;
peek(not(parser_with_context!(script_alphanum_character)(
context,
)))(remaining)?;
Ok((remaining, final_char))
}
#[tracing::instrument(ret, level = "debug")]
fn script_with_braces<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, Vec<Object<'s>>> {
let (remaining, _) = tag("{")(input)?;
let parser_context = context
.with_additional_node(ContextElement::SubscriptSuperscriptBrace(
SubscriptSuperscriptBrace {
position: remaining,
depth: 0,
},
))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &script_with_braces_end,
}));
let (remaining, (children, _exit_contents)) = many_till(
parser_with_context!(standard_set_object)(&parser_context),
parser_with_context!(exit_matcher_parser)(&parser_context),
)(remaining)?;
let (remaining, _) = tag("}")(remaining)?;
Ok((remaining, children))
}
#[tracing::instrument(ret, level = "debug")]
fn script_with_braces_end<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
let context_depth = get_bracket_depth(context)
.expect("This function should only be called from inside a subscript or superscript.");
let text_since_context_entry = get_consumed(context_depth.position, input);
let mut current_depth = context_depth.depth;
for c in text_since_context_entry.chars() {
match c {
'{' => {
current_depth += 1;
}
'}' if current_depth == 0 => {
panic!("Exceeded subscript or superscript brace depth.")
}
'}' if current_depth > 0 => {
current_depth -= 1;
}
_ => {}
}
}
if current_depth == 0 {
let close_bracket = tag::<&str, &str, CustomError<&str>>("}")(input);
if close_bracket.is_ok() {
return close_bracket;
}
}
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not a valid end for subscript or superscript.",
))));
}
#[tracing::instrument(ret, level = "debug")]
fn get_bracket_depth<'r, 's>(
context: Context<'r, 's>,
) -> Option<&'r SubscriptSuperscriptBrace<'s>> {
for node in context.iter() {
match node.get_data() {
ContextElement::SubscriptSuperscriptBrace(depth) => return Some(depth),
_ => {}
}
}
None
}

View File

@ -65,6 +65,8 @@ impl<'r, 's> Token<'r, 's> {
Object::LineBreak(_) => Box::new(std::iter::empty()),
Object::Target(_) => Box::new(std::iter::empty()),
Object::StatisticsCookie(_) => Box::new(std::iter::empty()),
Object::Subscript(_) => Box::new(std::iter::empty()), // TODO: Iterate over children
Object::Superscript(_) => Box::new(std::iter::empty()), // TODO: Iterate over children
},
Token::Element(elem) => match elem {
Element::Paragraph(inner) => Box::new(inner.children.iter().map(Token::Object)),