Merge branch 'citation'
Some checks failed
semver Build semver has succeeded
rustfmt Build rustfmt has failed
rust-test Build rust-test has failed

This commit is contained in:
Tom Alexander 2023-07-21 18:52:33 -04:00
commit 1e2ea17a9c
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
10 changed files with 509 additions and 2 deletions

View File

@ -0,0 +1,5 @@
[cite:@foo]
[cite/a/b-_/foo:globalprefix;keyprefix @foo keysuffix;globalsuffix]
text before [cite:@bar] text after

View File

@ -3,6 +3,8 @@ use super::util::assert_name;
use crate::parser::sexp::Token;
use crate::parser::AngleLink;
use crate::parser::Bold;
use crate::parser::Citation;
use crate::parser::CitationReference;
use crate::parser::Clock;
use crate::parser::Code;
use crate::parser::Comment;
@ -162,6 +164,8 @@ fn compare_object<'s>(
Object::LatexFragment(obj) => compare_latex_fragment(source, emacs, obj),
Object::ExportSnippet(obj) => compare_export_snippet(source, emacs, obj),
Object::FootnoteReference(obj) => compare_footnote_reference(source, emacs, obj),
Object::Citation(obj) => compare_citation(source, emacs, obj),
Object::CitationReference(obj) => compare_citation_reference(source, emacs, obj),
}
}
@ -1338,3 +1342,49 @@ fn compare_footnote_reference<'s>(
children: Vec::new(),
})
}
fn compare_citation<'s>(
source: &'s str,
emacs: &'s Token<'s>,
rust: &'s Citation<'s>,
) -> Result<DiffResult, Box<dyn std::error::Error>> {
let mut this_status = DiffStatus::Good;
let emacs_name = "citation";
if assert_name(emacs, emacs_name).is_err() {
this_status = DiffStatus::Bad;
}
if assert_bounds(source, emacs, rust).is_err() {
this_status = DiffStatus::Bad;
}
Ok(DiffResult {
status: this_status,
name: emacs_name.to_owned(),
message: None,
children: Vec::new(),
})
}
fn compare_citation_reference<'s>(
source: &'s str,
emacs: &'s Token<'s>,
rust: &'s CitationReference<'s>,
) -> Result<DiffResult, Box<dyn std::error::Error>> {
let mut this_status = DiffStatus::Good;
let emacs_name = "citation-reference";
if assert_name(emacs, emacs_name).is_err() {
this_status = DiffStatus::Bad;
}
if assert_bounds(source, emacs, rust).is_err() {
this_status = DiffStatus::Bad;
}
Ok(DiffResult {
status: this_status,
name: emacs_name.to_owned(),
message: None,
children: Vec::new(),
})
}

221
src/parser/citation.rs Normal file
View File

@ -0,0 +1,221 @@
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::character::complete::anychar;
use nom::character::complete::space0;
use nom::combinator::opt;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many1;
use nom::multi::many_till;
use nom::multi::separated_list1;
use nom::sequence::tuple;
use super::Context;
use crate::error::CustomError;
use crate::error::Res;
use crate::parser::citation_reference::citation_reference;
use crate::parser::citation_reference::citation_reference_key;
use crate::parser::citation_reference::get_bracket_depth;
use crate::parser::exiting::ExitClass;
use crate::parser::object::Citation;
use crate::parser::object_parser::standard_set_object;
use crate::parser::parser_context::CitationBracket;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::Object;
#[tracing::instrument(ret, level = "debug")]
pub fn citation<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Citation<'s>> {
// TODO: Despite being a standard object, citations cannot exist inside the global prefix/suffix for other citations because citations must contain something that matches @key which is forbidden inside the global prefix/suffix. This TODO is to evaluate if its worth putting in an explicit check for this (which can be easily accomplished by checking the output of `get_bracket_depth()`). I suspect its not worth it because I expect, outside of intentionally crafted inputs, this parser will exit immediately inside a citation since it is unlikely to find the "[cite" substring inside a citation global prefix/suffix.
let (remaining, _) = tag_no_case("[cite")(input)?;
let (remaining, _) = opt(citestyle)(remaining)?;
let (remaining, _) = tag(":")(remaining)?;
let (remaining, _prefix) = opt(parser_with_context!(global_prefix)(context))(remaining)?;
let (remaining, _references) =
separated_list1(tag(";"), parser_with_context!(citation_reference)(context))(remaining)?;
let (remaining, _suffix) = opt(tuple((
tag(";"),
parser_with_context!(global_suffix)(context),
)))(remaining)?;
let (remaining, _) = tag("]")(remaining)?;
let (remaining, _) = space0(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, Citation { source }))
}
#[tracing::instrument(ret, level = "debug")]
fn citestyle<'r, 's>(input: &'s str) -> Res<&'s str, &'s str> {
let (remaining, _) = tuple((tag("/"), style))(input)?;
let (remaining, _) = opt(tuple((tag("/"), variant)))(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
fn style<'r, 's>(input: &'s str) -> Res<&'s str, &'s str> {
recognize(many1(verify(anychar, |c| {
c.is_alphanumeric() || "_-".contains(*c)
})))(input)
}
#[tracing::instrument(ret, level = "debug")]
fn variant<'r, 's>(input: &'s str) -> Res<&'s str, &'s str> {
recognize(many1(verify(anychar, |c| {
c.is_alphanumeric() || "_-/".contains(*c)
})))(input)
}
#[tracing::instrument(ret, level = "debug")]
fn global_prefix<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, Vec<Object<'s>>> {
// TODO: I could insert CitationBracket entries in the context after each matched object to reduce the scanning done for counting brackets which should be more efficient.
let parser_context = context
.with_additional_node(ContextElement::CitationBracket(CitationBracket {
position: input,
depth: 0,
}))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &global_prefix_end,
}));
let (remaining, (children, _exit_contents)) = verify(
many_till(
parser_with_context!(standard_set_object)(&parser_context),
parser_with_context!(exit_matcher_parser)(&parser_context),
),
|(children, _exit_contents)| !children.is_empty(),
)(input)?;
let (remaining, _) = tag(";")(remaining)?;
Ok((remaining, children))
}
#[tracing::instrument(ret, level = "debug")]
fn global_prefix_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let context_depth = get_bracket_depth(context)
.expect("This function should only be called from inside a citation.");
let text_since_context_entry = get_consumed(context_depth.position, input);
let mut current_depth = context_depth.depth;
for c in text_since_context_entry.chars() {
match c {
'[' => {
current_depth += 1;
}
']' if current_depth == 0 => {
panic!("Exceeded citation global prefix bracket depth.")
}
']' if current_depth > 0 => {
current_depth -= 1;
}
_ => {}
}
}
if current_depth == 0 {
let close_bracket = tag::<&str, &str, CustomError<&str>>("]")(input);
if close_bracket.is_ok() {
return close_bracket;
}
}
alt((
tag(";"),
recognize(parser_with_context!(citation_reference_key)(context)),
))(input)
}
#[tracing::instrument(ret, level = "debug")]
fn global_suffix<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, Vec<Object<'s>>> {
// TODO: I could insert CitationBracket entries in the context after each matched object to reduce the scanning done for counting brackets which should be more efficient.
let parser_context = context
.with_additional_node(ContextElement::CitationBracket(CitationBracket {
position: input,
depth: 0,
}))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &global_suffix_end,
}));
let (remaining, (children, _exit_contents)) = verify(
many_till(
parser_with_context!(standard_set_object)(&parser_context),
parser_with_context!(exit_matcher_parser)(&parser_context),
),
|(children, _exit_contents)| !children.is_empty(),
)(input)?;
Ok((remaining, children))
}
#[tracing::instrument(ret, level = "debug")]
fn global_suffix_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let context_depth = get_bracket_depth(context)
.expect("This function should only be called from inside a citation.");
let text_since_context_entry = get_consumed(context_depth.position, input);
let mut current_depth = context_depth.depth;
for c in text_since_context_entry.chars() {
match c {
'[' => {
current_depth += 1;
}
']' if current_depth == 0 => {
panic!("Exceeded citation global suffix bracket depth.")
}
']' if current_depth > 0 => {
current_depth -= 1;
}
_ => {}
}
}
if current_depth == 0 {
let close_bracket = tag::<&str, &str, CustomError<&str>>("]")(input);
if close_bracket.is_ok() {
return close_bracket;
}
}
alt((
tag(";"),
recognize(parser_with_context!(citation_reference_key)(context)),
))(input)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::element_parser::element;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ContextTree;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::source::Source;
#[test]
fn citation_simple() {
let input = "[cite:@foo]";
let initial_context: ContextTree<'_, '_> = ContextTree::new();
let document_context =
initial_context.with_additional_node(ContextElement::DocumentRoot(input));
let paragraph_matcher = parser_with_context!(element(true))(&document_context);
let (remaining, first_paragraph) = paragraph_matcher(input).expect("Parse first paragraph");
let first_paragraph = match first_paragraph {
crate::parser::Element::Paragraph(paragraph) => paragraph,
_ => panic!("Should be a paragraph!"),
};
assert_eq!(remaining, "");
assert_eq!(first_paragraph.get_source(), "[cite:@foo]");
assert_eq!(first_paragraph.children.len(), 1);
assert_eq!(
first_paragraph
.children
.get(0)
.expect("Len already asserted to be 1"),
&Object::Citation(Citation {
source: "[cite:@foo]"
})
);
}
}

View File

@ -0,0 +1,175 @@
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::anychar;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::preceded;
use nom::sequence::tuple;
use super::Context;
use crate::error::CustomError;
use crate::error::Res;
use crate::parser::exiting::ExitClass;
use crate::parser::object::CitationReference;
use crate::parser::object_parser::minimal_set_object;
use crate::parser::parser_context::CitationBracket;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::WORD_CONSTITUENT_CHARACTERS;
use crate::parser::Object;
#[tracing::instrument(ret, level = "debug")]
pub fn citation_reference<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, CitationReference<'s>> {
let (remaining, _prefix) = opt(parser_with_context!(key_prefix)(context))(input)?;
let (remaining, _key) = parser_with_context!(citation_reference_key)(context)(remaining)?;
let (remaining, _suffix) = opt(parser_with_context!(key_suffix)(context))(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, CitationReference { source }))
}
#[tracing::instrument(ret, level = "debug")]
pub fn citation_reference_key<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
let (remaining, source) = recognize(tuple((
tag("@"),
many1(verify(
preceded(
not(parser_with_context!(exit_matcher_parser)(context)),
anychar,
),
|c| {
WORD_CONSTITUENT_CHARACTERS.contains(*c) || "-.:?~`'/*@+|(){}<>&_^$#%~".contains(*c)
},
)),
)))(input)?;
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
fn key_prefix<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Vec<Object<'s>>> {
// TODO: I could insert CitationBracket entries in the context after each matched object to reduce the scanning done for counting brackets which should be more efficient.
let parser_context = context
.with_additional_node(ContextElement::CitationBracket(CitationBracket {
position: input,
depth: 0,
}))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &key_prefix_end,
}));
let (remaining, (children, _exit_contents)) = verify(
many_till(
parser_with_context!(minimal_set_object)(&parser_context),
parser_with_context!(exit_matcher_parser)(&parser_context),
),
|(children, _exit_contents)| !children.is_empty(),
)(input)?;
Ok((remaining, children))
}
#[tracing::instrument(ret, level = "debug")]
fn key_suffix<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, Vec<Object<'s>>> {
// TODO: I could insert CitationBracket entries in the context after each matched object to reduce the scanning done for counting brackets which should be more efficient.
let parser_context = context
.with_additional_node(ContextElement::CitationBracket(CitationBracket {
position: input,
depth: 0,
}))
.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &key_suffix_end,
}));
let (remaining, (children, _exit_contents)) = verify(
many_till(
parser_with_context!(minimal_set_object)(&parser_context),
parser_with_context!(exit_matcher_parser)(&parser_context),
),
|(children, _exit_contents)| !children.is_empty(),
)(input)?;
Ok((remaining, children))
}
#[tracing::instrument(ret, level = "debug")]
pub fn get_bracket_depth<'r, 's>(context: Context<'r, 's>) -> Option<&'r CitationBracket<'s>> {
for node in context.iter() {
match node.get_data() {
ContextElement::CitationBracket(depth) => return Some(depth),
_ => {}
}
}
None
}
#[tracing::instrument(ret, level = "debug")]
fn key_prefix_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let context_depth = get_bracket_depth(context)
.expect("This function should only be called from inside a citation reference.");
let text_since_context_entry = get_consumed(context_depth.position, input);
let mut current_depth = context_depth.depth;
for c in text_since_context_entry.chars() {
match c {
'[' => {
current_depth += 1;
}
']' if current_depth == 0 => {
panic!("Exceeded citation reference key prefix bracket depth.")
}
']' if current_depth > 0 => {
current_depth -= 1;
}
_ => {}
}
}
if current_depth == 0 {
let close_bracket = tag::<&str, &str, CustomError<&str>>("]")(input);
if close_bracket.is_ok() {
return close_bracket;
}
}
alt((
tag(";"),
recognize(parser_with_context!(citation_reference_key)(context)),
))(input)
}
#[tracing::instrument(ret, level = "debug")]
fn key_suffix_end<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let context_depth = get_bracket_depth(context)
.expect("This function should only be called from inside a citation reference.");
let text_since_context_entry = get_consumed(context_depth.position, input);
let mut current_depth = context_depth.depth;
for c in text_since_context_entry.chars() {
match c {
'[' => {
current_depth += 1;
}
']' if current_depth == 0 => {
panic!("Exceeded citation reference key prefix bracket depth.")
}
']' if current_depth > 0 => {
current_depth -= 1;
}
_ => {}
}
}
if current_depth == 0 {
let close_bracket = tag::<&str, &str, CustomError<&str>>("]")(input);
if close_bracket.is_ok() {
return close_bracket;
}
}
tag(";")(input)
}

View File

@ -1,4 +1,6 @@
mod angle_link;
mod citation;
mod citation_reference;
mod clock;
mod comment;
mod diary_sexp;
@ -73,6 +75,8 @@ pub use lesser_element::TableCell;
pub use lesser_element::VerseBlock;
pub use object::AngleLink;
pub use object::Bold;
pub use object::Citation;
pub use object::CitationReference;
pub use object::Code;
pub use object::Entity;
pub use object::ExportSnippet;

View File

@ -19,6 +19,8 @@ pub enum Object<'s> {
LatexFragment(LatexFragment<'s>),
ExportSnippet(ExportSnippet<'s>),
FootnoteReference(FootnoteReference<'s>),
Citation(Citation<'s>),
CitationReference(CitationReference<'s>),
}
#[derive(Debug, PartialEq)]
@ -125,6 +127,16 @@ pub struct FootnoteReference<'s> {
pub definition: Vec<Object<'s>>,
}
#[derive(Debug, PartialEq)]
pub struct Citation<'s> {
pub source: &'s str,
}
#[derive(Debug, PartialEq)]
pub struct CitationReference<'s> {
pub source: &'s str,
}
impl<'s> Source<'s> for Object<'s> {
fn get_source(&'s self) -> &'s str {
match self {
@ -145,6 +157,8 @@ impl<'s> Source<'s> for Object<'s> {
Object::LatexFragment(obj) => obj.source,
Object::ExportSnippet(obj) => obj.source,
Object::FootnoteReference(obj) => obj.source,
Object::Citation(obj) => obj.source,
Object::CitationReference(obj) => obj.source,
}
}
}
@ -244,3 +258,15 @@ impl<'s> Source<'s> for FootnoteReference<'s> {
self.source
}
}
impl<'s> Source<'s> for Citation<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}
impl<'s> Source<'s> for CitationReference<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}

View File

@ -8,6 +8,7 @@ use super::regular_link::regular_link;
use super::Context;
use crate::error::Res;
use crate::parser::angle_link::angle_link;
use crate::parser::citation::citation;
use crate::parser::entity::entity;
use crate::parser::export_snippet::export_snippet;
use crate::parser::footnote_reference::footnote_reference;
@ -28,6 +29,7 @@ pub fn standard_set_object<'r, 's>(
not(|i| context.check_exit_matcher(i))(input)?;
alt((
map(parser_with_context!(citation)(context), Object::Citation),
map(
parser_with_context!(footnote_reference)(context),
Object::FootnoteReference,
@ -84,6 +86,7 @@ pub fn any_object_except_plain_text<'r, 's>(
) -> Res<&'s str, Object<'s>> {
// Used for exit matchers so this does not check exit matcher condition.
alt((
map(parser_with_context!(citation)(context), Object::Citation),
map(
parser_with_context!(footnote_reference)(context),
Object::FootnoteReference,

View File

@ -147,12 +147,27 @@ pub enum ContextElement<'r, 's> {
/// The definition inside a footnote reference must have balanced
/// brackets [] inside the definition, so this stores the amount
/// of opening brackets subtracted by the amount of closing
/// brackets within the definition.
/// brackets within the definition must equal zero.
///
/// A reference to the position in the string is also included so
/// unbalanced brackets can be detected in the middle of an
/// object.
FootnoteReferenceDefinition(FootnoteReferenceDefinition<'s>),
/// Stores the current bracket depth inside a citation.
///
/// The global prefix, global suffix, key prefix, and key suffix
/// inside a footnote reference must have balanced brackets []
/// inside the definition, so this stores the amount of opening
/// brackets subtracted by the amount of closing brackets within
/// the definition must equal zero. None of the prefixes or
/// suffixes can be nested inside each other so we can use a
/// single type for this without conflict.
///
/// A reference to the position in the string is also included so
/// unbalanced brackets can be detected in the middle of an
/// object.
CitationBracket(CitationBracket<'s>),
}
pub struct ExitMatcherNode<'r> {
@ -166,6 +181,12 @@ pub struct FootnoteReferenceDefinition<'s> {
pub depth: usize,
}
#[derive(Debug)]
pub struct CitationBracket<'s> {
pub position: &'s str,
pub depth: usize,
}
impl<'r> std::fmt::Debug for ExitMatcherNode<'r> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut formatter = f.debug_struct("ExitMatcherNode");

View File

@ -58,6 +58,8 @@ impl<'r, 's> Token<'r, 's> {
Object::FootnoteReference(inner) => {
Box::new(inner.definition.iter().map(Token::Object))
}
Object::Citation(_) => Box::new(std::iter::empty()), // TODO: Iterate over children
Object::CitationReference(_) => Box::new(std::iter::empty()), // TODO: Iterate over children
},
Token::Element(elem) => match elem {
Element::Paragraph(inner) => Box::new(inner.children.iter().map(Token::Object)),

View File

@ -1 +1 @@
[fn:2:This is a footnote reference since it has the definition inside the brackets. This style is referred to as an "inline footnote".]
[cite/a/b-_/foo:globalprefix;keyprefix @foo keysuffix;globalsuffix]