diff --git a/org_mode_samples/citation/simple.org b/org_mode_samples/citation/simple.org new file mode 100644 index 0000000..9af9473 --- /dev/null +++ b/org_mode_samples/citation/simple.org @@ -0,0 +1,5 @@ +[cite:@foo] + +[cite/a/b-_/foo:globalprefix;keyprefix @foo keysuffix;globalsuffix] + +text before [cite:@bar] text after diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 9a04cec..fd91cee 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -3,6 +3,8 @@ use super::util::assert_name; use crate::parser::sexp::Token; use crate::parser::AngleLink; use crate::parser::Bold; +use crate::parser::Citation; +use crate::parser::CitationReference; use crate::parser::Clock; use crate::parser::Code; use crate::parser::Comment; @@ -162,6 +164,8 @@ fn compare_object<'s>( Object::LatexFragment(obj) => compare_latex_fragment(source, emacs, obj), Object::ExportSnippet(obj) => compare_export_snippet(source, emacs, obj), Object::FootnoteReference(obj) => compare_footnote_reference(source, emacs, obj), + Object::Citation(obj) => compare_citation(source, emacs, obj), + Object::CitationReference(obj) => compare_citation_reference(source, emacs, obj), } } @@ -1338,3 +1342,49 @@ fn compare_footnote_reference<'s>( children: Vec::new(), }) } + +fn compare_citation<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s Citation<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "citation"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: emacs_name.to_owned(), + message: None, + children: Vec::new(), + }) +} + +fn compare_citation_reference<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s CitationReference<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "citation-reference"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: emacs_name.to_owned(), + message: None, + children: Vec::new(), + }) +} diff --git a/src/parser/citation.rs b/src/parser/citation.rs new file mode 100644 index 0000000..fa8a5ee --- /dev/null +++ b/src/parser/citation.rs @@ -0,0 +1,221 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::bytes::complete::tag_no_case; +use nom::character::complete::anychar; +use nom::character::complete::space0; +use nom::combinator::opt; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many1; +use nom::multi::many_till; +use nom::multi::separated_list1; +use nom::sequence::tuple; + +use super::Context; +use crate::error::CustomError; +use crate::error::Res; +use crate::parser::citation_reference::citation_reference; +use crate::parser::citation_reference::citation_reference_key; +use crate::parser::citation_reference::get_bracket_depth; +use crate::parser::exiting::ExitClass; +use crate::parser::object::Citation; +use crate::parser::object_parser::standard_set_object; +use crate::parser::parser_context::CitationBracket; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::exit_matcher_parser; +use crate::parser::util::get_consumed; +use crate::parser::Object; + +#[tracing::instrument(ret, level = "debug")] +pub fn citation<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Citation<'s>> { + // TODO: Despite being a standard object, citations cannot exist inside the global prefix/suffix for other citations because citations must contain something that matches @key which is forbidden inside the global prefix/suffix. This TODO is to evaluate if its worth putting in an explicit check for this (which can be easily accomplished by checking the output of `get_bracket_depth()`). I suspect its not worth it because I expect, outside of intentionally crafted inputs, this parser will exit immediately inside a citation since it is unlikely to find the "[cite" substring inside a citation global prefix/suffix. + let (remaining, _) = tag_no_case("[cite")(input)?; + let (remaining, _) = opt(citestyle)(remaining)?; + let (remaining, _) = tag(":")(remaining)?; + let (remaining, _prefix) = opt(parser_with_context!(global_prefix)(context))(remaining)?; + let (remaining, _references) = + separated_list1(tag(";"), parser_with_context!(citation_reference)(context))(remaining)?; + let (remaining, _suffix) = opt(tuple(( + tag(";"), + parser_with_context!(global_suffix)(context), + )))(remaining)?; + let (remaining, _) = tag("]")(remaining)?; + let (remaining, _) = space0(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, Citation { source })) +} + +#[tracing::instrument(ret, level = "debug")] +fn citestyle<'r, 's>(input: &'s str) -> Res<&'s str, &'s str> { + let (remaining, _) = tuple((tag("/"), style))(input)?; + let (remaining, _) = opt(tuple((tag("/"), variant)))(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} + +#[tracing::instrument(ret, level = "debug")] +fn style<'r, 's>(input: &'s str) -> Res<&'s str, &'s str> { + recognize(many1(verify(anychar, |c| { + c.is_alphanumeric() || "_-".contains(*c) + })))(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn variant<'r, 's>(input: &'s str) -> Res<&'s str, &'s str> { + recognize(many1(verify(anychar, |c| { + c.is_alphanumeric() || "_-/".contains(*c) + })))(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn global_prefix<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, Vec>> { + // TODO: I could insert CitationBracket entries in the context after each matched object to reduce the scanning done for counting brackets which should be more efficient. + let parser_context = context + .with_additional_node(ContextElement::CitationBracket(CitationBracket { + position: input, + depth: 0, + })) + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &global_prefix_end, + })); + let (remaining, (children, _exit_contents)) = verify( + many_till( + parser_with_context!(standard_set_object)(&parser_context), + parser_with_context!(exit_matcher_parser)(&parser_context), + ), + |(children, _exit_contents)| !children.is_empty(), + )(input)?; + let (remaining, _) = tag(";")(remaining)?; + Ok((remaining, children)) +} + +#[tracing::instrument(ret, level = "debug")] +fn global_prefix_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let context_depth = get_bracket_depth(context) + .expect("This function should only be called from inside a citation."); + let text_since_context_entry = get_consumed(context_depth.position, input); + let mut current_depth = context_depth.depth; + for c in text_since_context_entry.chars() { + match c { + '[' => { + current_depth += 1; + } + ']' if current_depth == 0 => { + panic!("Exceeded citation global prefix bracket depth.") + } + ']' if current_depth > 0 => { + current_depth -= 1; + } + _ => {} + } + } + if current_depth == 0 { + let close_bracket = tag::<&str, &str, CustomError<&str>>("]")(input); + if close_bracket.is_ok() { + return close_bracket; + } + } + alt(( + tag(";"), + recognize(parser_with_context!(citation_reference_key)(context)), + ))(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn global_suffix<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, Vec>> { + // TODO: I could insert CitationBracket entries in the context after each matched object to reduce the scanning done for counting brackets which should be more efficient. + let parser_context = context + .with_additional_node(ContextElement::CitationBracket(CitationBracket { + position: input, + depth: 0, + })) + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &global_suffix_end, + })); + let (remaining, (children, _exit_contents)) = verify( + many_till( + parser_with_context!(standard_set_object)(&parser_context), + parser_with_context!(exit_matcher_parser)(&parser_context), + ), + |(children, _exit_contents)| !children.is_empty(), + )(input)?; + Ok((remaining, children)) +} + +#[tracing::instrument(ret, level = "debug")] +fn global_suffix_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let context_depth = get_bracket_depth(context) + .expect("This function should only be called from inside a citation."); + let text_since_context_entry = get_consumed(context_depth.position, input); + let mut current_depth = context_depth.depth; + for c in text_since_context_entry.chars() { + match c { + '[' => { + current_depth += 1; + } + ']' if current_depth == 0 => { + panic!("Exceeded citation global suffix bracket depth.") + } + ']' if current_depth > 0 => { + current_depth -= 1; + } + _ => {} + } + } + if current_depth == 0 { + let close_bracket = tag::<&str, &str, CustomError<&str>>("]")(input); + if close_bracket.is_ok() { + return close_bracket; + } + } + alt(( + tag(";"), + recognize(parser_with_context!(citation_reference_key)(context)), + ))(input) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parser::element_parser::element; + use crate::parser::parser_context::ContextElement; + use crate::parser::parser_context::ContextTree; + use crate::parser::parser_with_context::parser_with_context; + use crate::parser::source::Source; + + #[test] + fn citation_simple() { + let input = "[cite:@foo]"; + let initial_context: ContextTree<'_, '_> = ContextTree::new(); + let document_context = + initial_context.with_additional_node(ContextElement::DocumentRoot(input)); + let paragraph_matcher = parser_with_context!(element(true))(&document_context); + let (remaining, first_paragraph) = paragraph_matcher(input).expect("Parse first paragraph"); + let first_paragraph = match first_paragraph { + crate::parser::Element::Paragraph(paragraph) => paragraph, + _ => panic!("Should be a paragraph!"), + }; + assert_eq!(remaining, ""); + assert_eq!(first_paragraph.get_source(), "[cite:@foo]"); + assert_eq!(first_paragraph.children.len(), 1); + assert_eq!( + first_paragraph + .children + .get(0) + .expect("Len already asserted to be 1"), + &Object::Citation(Citation { + source: "[cite:@foo]" + }) + ); + } +} diff --git a/src/parser/citation_reference.rs b/src/parser/citation_reference.rs new file mode 100644 index 0000000..18eba3b --- /dev/null +++ b/src/parser/citation_reference.rs @@ -0,0 +1,175 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::anychar; +use nom::combinator::not; +use nom::combinator::opt; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many1; +use nom::multi::many_till; +use nom::sequence::preceded; +use nom::sequence::tuple; + +use super::Context; +use crate::error::CustomError; +use crate::error::Res; +use crate::parser::exiting::ExitClass; +use crate::parser::object::CitationReference; +use crate::parser::object_parser::minimal_set_object; +use crate::parser::parser_context::CitationBracket; +use crate::parser::parser_context::ContextElement; +use crate::parser::parser_context::ExitMatcherNode; +use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::exit_matcher_parser; +use crate::parser::util::get_consumed; +use crate::parser::util::WORD_CONSTITUENT_CHARACTERS; +use crate::parser::Object; + +#[tracing::instrument(ret, level = "debug")] +pub fn citation_reference<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, CitationReference<'s>> { + let (remaining, _prefix) = opt(parser_with_context!(key_prefix)(context))(input)?; + let (remaining, _key) = parser_with_context!(citation_reference_key)(context)(remaining)?; + let (remaining, _suffix) = opt(parser_with_context!(key_suffix)(context))(remaining)?; + let source = get_consumed(input, remaining); + + Ok((remaining, CitationReference { source })) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn citation_reference_key<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + let (remaining, source) = recognize(tuple(( + tag("@"), + many1(verify( + preceded( + not(parser_with_context!(exit_matcher_parser)(context)), + anychar, + ), + |c| { + WORD_CONSTITUENT_CHARACTERS.contains(*c) || "-.:?~`'/*@+|(){}<>&_^$#%~".contains(*c) + }, + )), + )))(input)?; + Ok((remaining, source)) +} + +#[tracing::instrument(ret, level = "debug")] +fn key_prefix<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Vec>> { + // TODO: I could insert CitationBracket entries in the context after each matched object to reduce the scanning done for counting brackets which should be more efficient. + let parser_context = context + .with_additional_node(ContextElement::CitationBracket(CitationBracket { + position: input, + depth: 0, + })) + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &key_prefix_end, + })); + let (remaining, (children, _exit_contents)) = verify( + many_till( + parser_with_context!(minimal_set_object)(&parser_context), + parser_with_context!(exit_matcher_parser)(&parser_context), + ), + |(children, _exit_contents)| !children.is_empty(), + )(input)?; + Ok((remaining, children)) +} + +#[tracing::instrument(ret, level = "debug")] +fn key_suffix<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Vec>> { + // TODO: I could insert CitationBracket entries in the context after each matched object to reduce the scanning done for counting brackets which should be more efficient. + let parser_context = context + .with_additional_node(ContextElement::CitationBracket(CitationBracket { + position: input, + depth: 0, + })) + .with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode { + class: ExitClass::Beta, + exit_matcher: &key_suffix_end, + })); + let (remaining, (children, _exit_contents)) = verify( + many_till( + parser_with_context!(minimal_set_object)(&parser_context), + parser_with_context!(exit_matcher_parser)(&parser_context), + ), + |(children, _exit_contents)| !children.is_empty(), + )(input)?; + Ok((remaining, children)) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn get_bracket_depth<'r, 's>(context: Context<'r, 's>) -> Option<&'r CitationBracket<'s>> { + for node in context.iter() { + match node.get_data() { + ContextElement::CitationBracket(depth) => return Some(depth), + _ => {} + } + } + None +} + +#[tracing::instrument(ret, level = "debug")] +fn key_prefix_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let context_depth = get_bracket_depth(context) + .expect("This function should only be called from inside a citation reference."); + let text_since_context_entry = get_consumed(context_depth.position, input); + let mut current_depth = context_depth.depth; + for c in text_since_context_entry.chars() { + match c { + '[' => { + current_depth += 1; + } + ']' if current_depth == 0 => { + panic!("Exceeded citation reference key prefix bracket depth.") + } + ']' if current_depth > 0 => { + current_depth -= 1; + } + _ => {} + } + } + if current_depth == 0 { + let close_bracket = tag::<&str, &str, CustomError<&str>>("]")(input); + if close_bracket.is_ok() { + return close_bracket; + } + } + alt(( + tag(";"), + recognize(parser_with_context!(citation_reference_key)(context)), + ))(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn key_suffix_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let context_depth = get_bracket_depth(context) + .expect("This function should only be called from inside a citation reference."); + let text_since_context_entry = get_consumed(context_depth.position, input); + let mut current_depth = context_depth.depth; + for c in text_since_context_entry.chars() { + match c { + '[' => { + current_depth += 1; + } + ']' if current_depth == 0 => { + panic!("Exceeded citation reference key prefix bracket depth.") + } + ']' if current_depth > 0 => { + current_depth -= 1; + } + _ => {} + } + } + if current_depth == 0 { + let close_bracket = tag::<&str, &str, CustomError<&str>>("]")(input); + if close_bracket.is_ok() { + return close_bracket; + } + } + tag(";")(input) +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6cf9bbc..d80dcea 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,4 +1,6 @@ mod angle_link; +mod citation; +mod citation_reference; mod clock; mod comment; mod diary_sexp; @@ -73,6 +75,8 @@ pub use lesser_element::TableCell; pub use lesser_element::VerseBlock; pub use object::AngleLink; pub use object::Bold; +pub use object::Citation; +pub use object::CitationReference; pub use object::Code; pub use object::Entity; pub use object::ExportSnippet; diff --git a/src/parser/object.rs b/src/parser/object.rs index 42e4401..ffa9bf6 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -19,6 +19,8 @@ pub enum Object<'s> { LatexFragment(LatexFragment<'s>), ExportSnippet(ExportSnippet<'s>), FootnoteReference(FootnoteReference<'s>), + Citation(Citation<'s>), + CitationReference(CitationReference<'s>), } #[derive(Debug, PartialEq)] @@ -125,6 +127,16 @@ pub struct FootnoteReference<'s> { pub definition: Vec>, } +#[derive(Debug, PartialEq)] +pub struct Citation<'s> { + pub source: &'s str, +} + +#[derive(Debug, PartialEq)] +pub struct CitationReference<'s> { + pub source: &'s str, +} + impl<'s> Source<'s> for Object<'s> { fn get_source(&'s self) -> &'s str { match self { @@ -145,6 +157,8 @@ impl<'s> Source<'s> for Object<'s> { Object::LatexFragment(obj) => obj.source, Object::ExportSnippet(obj) => obj.source, Object::FootnoteReference(obj) => obj.source, + Object::Citation(obj) => obj.source, + Object::CitationReference(obj) => obj.source, } } } @@ -244,3 +258,15 @@ impl<'s> Source<'s> for FootnoteReference<'s> { self.source } } + +impl<'s> Source<'s> for Citation<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for CitationReference<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} diff --git a/src/parser/object_parser.rs b/src/parser/object_parser.rs index 8ee65d3..4693ac6 100644 --- a/src/parser/object_parser.rs +++ b/src/parser/object_parser.rs @@ -8,6 +8,7 @@ use super::regular_link::regular_link; use super::Context; use crate::error::Res; use crate::parser::angle_link::angle_link; +use crate::parser::citation::citation; use crate::parser::entity::entity; use crate::parser::export_snippet::export_snippet; use crate::parser::footnote_reference::footnote_reference; @@ -28,6 +29,7 @@ pub fn standard_set_object<'r, 's>( not(|i| context.check_exit_matcher(i))(input)?; alt(( + map(parser_with_context!(citation)(context), Object::Citation), map( parser_with_context!(footnote_reference)(context), Object::FootnoteReference, @@ -84,6 +86,7 @@ pub fn any_object_except_plain_text<'r, 's>( ) -> Res<&'s str, Object<'s>> { // Used for exit matchers so this does not check exit matcher condition. alt(( + map(parser_with_context!(citation)(context), Object::Citation), map( parser_with_context!(footnote_reference)(context), Object::FootnoteReference, diff --git a/src/parser/parser_context.rs b/src/parser/parser_context.rs index d2c105e..d50377c 100644 --- a/src/parser/parser_context.rs +++ b/src/parser/parser_context.rs @@ -147,12 +147,27 @@ pub enum ContextElement<'r, 's> { /// The definition inside a footnote reference must have balanced /// brackets [] inside the definition, so this stores the amount /// of opening brackets subtracted by the amount of closing - /// brackets within the definition. + /// brackets within the definition must equal zero. /// /// A reference to the position in the string is also included so /// unbalanced brackets can be detected in the middle of an /// object. FootnoteReferenceDefinition(FootnoteReferenceDefinition<'s>), + + /// Stores the current bracket depth inside a citation. + /// + /// The global prefix, global suffix, key prefix, and key suffix + /// inside a footnote reference must have balanced brackets [] + /// inside the definition, so this stores the amount of opening + /// brackets subtracted by the amount of closing brackets within + /// the definition must equal zero. None of the prefixes or + /// suffixes can be nested inside each other so we can use a + /// single type for this without conflict. + /// + /// A reference to the position in the string is also included so + /// unbalanced brackets can be detected in the middle of an + /// object. + CitationBracket(CitationBracket<'s>), } pub struct ExitMatcherNode<'r> { @@ -166,6 +181,12 @@ pub struct FootnoteReferenceDefinition<'s> { pub depth: usize, } +#[derive(Debug)] +pub struct CitationBracket<'s> { + pub position: &'s str, + pub depth: usize, +} + impl<'r> std::fmt::Debug for ExitMatcherNode<'r> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut formatter = f.debug_struct("ExitMatcherNode"); diff --git a/src/parser/token.rs b/src/parser/token.rs index 5bbf740..731147d 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -58,6 +58,8 @@ impl<'r, 's> Token<'r, 's> { Object::FootnoteReference(inner) => { Box::new(inner.definition.iter().map(Token::Object)) } + Object::Citation(_) => Box::new(std::iter::empty()), // TODO: Iterate over children + Object::CitationReference(_) => Box::new(std::iter::empty()), // TODO: Iterate over children }, Token::Element(elem) => match elem { Element::Paragraph(inner) => Box::new(inner.children.iter().map(Token::Object)), diff --git a/toy_language.txt b/toy_language.txt index ad282ae..8594a8f 100644 --- a/toy_language.txt +++ b/toy_language.txt @@ -1 +1 @@ -[fn:2:This is a footnote reference since it has the definition inside the brackets. This style is referred to as an "inline footnote".] +[cite/a/b-_/foo:globalprefix;keyprefix @foo keysuffix;globalsuffix]