Merge branch 'latex_fragment'
All checks were successful
semver Build semver has succeeded
rustfmt Build rustfmt has succeeded
rust-test Build rust-test has succeeded

This commit is contained in:
Tom Alexander 2023-07-18 23:30:05 -04:00
commit 8be47c551d
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
12 changed files with 345 additions and 9 deletions

View File

@ -79,7 +79,6 @@ fn is_expect_fail(name: &str) -> Option<&str> {
"element_container_priority_greater_block_greater_block" => Some("Need to implement subscript."),
"element_container_priority_section_greater_block" => Some("Need to implement subscript."),
"paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
"entity_simple" => Some("Need to implement LaTeX fragments."),
_ => None,
}
}

View File

@ -0,0 +1,7 @@
\begin{itemize}
\item foo \sqrt{x}
\end{itemize}
\begin{itemize}
\item bar \sqrt{y}
\end{itemize} % Need text on this line to prevent it from becoming a LaTeX environment org-mode element

View File

@ -0,0 +1 @@
tex can have math between dollar signs like $x^2=y$ and $$ x=+\sqrt{y} $$ but also braces and brackets like \( x=2 \) and \[ x=-\sqrt{2} \]

View File

@ -0,0 +1,4 @@
\begin{itemize}
% this would be a LaTeX comment if this was a LaTeX document
\item Heres some math \sqrt{y}
\end{itemize} % Need text on this line to prevent it from becoming a LaTeX environment org-mode element

View File

@ -24,6 +24,7 @@ use crate::parser::HorizontalRule;
use crate::parser::Italic;
use crate::parser::Keyword;
use crate::parser::LatexEnvironment;
use crate::parser::LatexFragment;
use crate::parser::Object;
use crate::parser::OrgMacro;
use crate::parser::Paragraph;
@ -156,6 +157,7 @@ fn compare_object<'s>(
Object::AngleLink(obj) => compare_angle_link(source, emacs, obj),
Object::OrgMacro(obj) => compare_org_macro(source, emacs, obj),
Object::Entity(obj) => compare_entity(source, emacs, obj),
Object::LatexFragment(obj) => compare_latex_fragment(source, emacs, obj),
}
}
@ -1263,3 +1265,26 @@ fn compare_entity<'s>(
children: Vec::new(),
})
}
fn compare_latex_fragment<'s>(
source: &'s str,
emacs: &'s Token<'s>,
rust: &'s LatexFragment<'s>,
) -> Result<DiffResult, Box<dyn std::error::Error>> {
let mut this_status = DiffStatus::Good;
let emacs_name = "latex-fragment";
if assert_name(emacs, emacs_name).is_err() {
this_status = DiffStatus::Bad;
}
if assert_bounds(source, emacs, rust).is_err() {
this_status = DiffStatus::Bad;
}
Ok(DiffResult {
status: this_status,
name: emacs_name.to_owned(),
message: None,
children: Vec::new(),
})
}

View File

@ -2,10 +2,13 @@ use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::bytes::complete::take_while1;
use nom::character::complete::anychar;
use nom::character::complete::line_ending;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::util::get_consumed;
@ -15,7 +18,7 @@ use crate::parser::exiting::ExitClass;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::plain_text::plain_text;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::start_of_line;
use crate::parser::LatexEnvironment;
@ -41,9 +44,7 @@ pub fn latex_environment<'r, 's>(
exit_matcher: &latex_environment_end_specialized,
}));
let (remaining, _contents) = map(parser_with_context!(plain_text)(&parser_context), |obj| {
obj.source
})(remaining)?;
let (remaining, _contents) = contents(&latex_environment_end_specialized, context, remaining)?;
let (remaining, _end) = latex_environment_end_specialized(&parser_context, remaining)?;
let source = get_consumed(input, remaining);
@ -55,6 +56,23 @@ fn name<'s>(input: &'s str) -> Res<&'s str, &'s str> {
take_while1(|c: char| c.is_alphanumeric() || c == '*')(input)
}
#[tracing::instrument(ret, level = "debug", skip(end_matcher))]
pub fn contents<'r, 's, F: Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str>>(
end_matcher: F,
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
let (remaining, source) = recognize(many_till(
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
parser_with_context!(end_matcher)(context),
))),
))(input)?;
Ok((remaining, source))
}
fn latex_environment_end(
current_name: &str,
) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> {

View File

@ -0,0 +1,230 @@
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::alpha1;
use nom::character::complete::anychar;
use nom::character::complete::line_ending;
use nom::character::complete::none_of;
use nom::character::complete::one_of;
use nom::character::complete::space0;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::Context;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::get_one_before;
use crate::parser::LatexFragment;
#[tracing::instrument(ret, level = "debug")]
pub fn latex_fragment<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, LatexFragment<'s>> {
let (remaining, _) = alt((
parser_with_context!(raw_latex_fragment)(context),
parser_with_context!(escaped_parenthesis_fragment)(context),
parser_with_context!(escaped_bracket_fragment)(context),
parser_with_context!(double_dollar_fragment)(context),
parser_with_context!(dollar_char_fragment)(context),
parser_with_context!(bordered_dollar_fragment)(context),
))(input)?;
let (remaining, _) = space0(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, LatexFragment { source }))
}
#[tracing::instrument(ret, level = "debug")]
fn raw_latex_fragment<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let (remaining, _) = tag("\\")(input)?;
let (remaining, _) = name(context, remaining)?;
let (remaining, _) = opt(parser_with_context!(brackets)(context))(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
fn name<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
alpha1(input)
}
#[tracing::instrument(ret, level = "debug")]
fn brackets<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let (remaining, body) = alt((
recognize(tuple((
tag("["),
many_till(
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
alt((recognize(one_of("{}[]")), line_ending)),
))),
),
tag("]"),
))),
recognize(tuple((
tag("{"),
many_till(
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
alt((recognize(one_of("{}")), line_ending)),
))),
),
tag("}"),
))),
))(input)?;
Ok((remaining, body))
}
#[tracing::instrument(ret, level = "debug")]
fn escaped_parenthesis_fragment<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
let (remaining, _) = tag("\\(")(input)?;
let (remaining, _) = recognize(many_till(
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
tag("\\)"),
))),
))(remaining)?;
let (remaining, _) = tag("\\)")(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
fn escaped_bracket_fragment<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
let (remaining, _) = tag("\\[")(input)?;
let (remaining, _) = recognize(many_till(
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
tag("\\]"),
))),
))(remaining)?;
let (remaining, _) = tag("\\]")(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
fn double_dollar_fragment<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
// TODO: The documentation on the dollar sign versions is incomplete. Test to figure out what the real requirements are. For example, can this span more than 3 lines and can this contain a single $ since its terminated by $$?
let (remaining, _) = tag("$$")(input)?;
let (remaining, _) = recognize(many_till(
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
tag("$"),
))),
))(remaining)?;
let (remaining, _) = tag("$$")(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
fn dollar_char_fragment<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
let (_, _) = pre(context, input)?;
let (remaining, _) = tag("$")(input)?;
let (remaining, _) = verify(none_of(".,?;\""), |c| !c.is_whitespace())(remaining)?;
let (remaining, _) = tag("$")(remaining)?;
let (_, _) = peek(parser_with_context!(post)(context))(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
pub fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
if let Some('$') = preceding_character {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not a valid pre character for dollar char fragment.",
))));
}
Ok((input, ()))
}
#[tracing::instrument(ret, level = "debug")]
pub fn post<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
// TODO: What about eof? Test to find out.
// TODO: Figure out which punctuation characters should be included.
let (remaining, _) = alt((recognize(one_of(" \t-.,;:!?'\"")), line_ending))(input)?;
Ok((remaining, ()))
}
#[tracing::instrument(ret, level = "debug")]
fn bordered_dollar_fragment<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, &'s str> {
let (_, _) = pre(context, input)?;
let (remaining, _) = tag("$")(input)?;
// TODO: I'm assuming I should be peeking at the borders but the documentation is not clear. Test to figure out.
let (_, _) = peek(parser_with_context!(open_border)(context))(remaining)?;
// TODO: As an optimization it would be nice to exit early upon hitting the 3rd line break
let (remaining, _) = verify(
recognize(many_till(
anychar,
peek(alt((
parser_with_context!(exit_matcher_parser)(context),
tag("$"),
))),
)),
|body: &str| body.lines().take(4).count() <= 3,
)(remaining)?;
let (_, _) = peek(parser_with_context!(close_border)(context))(remaining)?;
let (remaining, _) = tag("$")(remaining)?;
let (_, _) = peek(parser_with_context!(post)(context))(remaining)?;
let source = get_consumed(input, remaining);
Ok((remaining, source))
}
#[tracing::instrument(ret, level = "debug")]
pub fn open_border<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> {
recognize(verify(none_of(".,;$"), |c| !c.is_whitespace()))(input)
}
#[tracing::instrument(ret, level = "debug")]
pub fn close_border<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> {
let document_root = context.get_document_root().unwrap();
let preceding_character = get_one_before(document_root, input)
.map(|slice| slice.chars().next())
.flatten();
match preceding_character {
Some(c) if !c.is_whitespace() && !".,;$".contains(c) => Ok((input, ())),
_ => {
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not a valid pre character for dollar char fragment.",
))));
}
}
}

View File

@ -16,6 +16,7 @@ mod greater_element;
mod horizontal_rule;
mod keyword;
mod latex_environment;
mod latex_fragment;
mod lesser_block;
mod lesser_element;
mod list;
@ -73,6 +74,7 @@ pub use object::Bold;
pub use object::Code;
pub use object::Entity;
pub use object::Italic;
pub use object::LatexFragment;
pub use object::Object;
pub use object::OrgMacro;
pub use object::PlainLink;

View File

@ -16,6 +16,7 @@ pub enum Object<'s> {
AngleLink(AngleLink<'s>),
OrgMacro(OrgMacro<'s>),
Entity(Entity<'s>),
LatexFragment(LatexFragment<'s>),
}
#[derive(Debug, PartialEq)]
@ -103,6 +104,11 @@ pub struct Entity<'s> {
pub entity_name: &'s str,
}
#[derive(Debug, PartialEq)]
pub struct LatexFragment<'s> {
pub source: &'s str,
}
impl<'s> Source<'s> for Object<'s> {
fn get_source(&'s self) -> &'s str {
match self {
@ -120,6 +126,7 @@ impl<'s> Source<'s> for Object<'s> {
Object::AngleLink(obj) => obj.source,
Object::OrgMacro(obj) => obj.source,
Object::Entity(obj) => obj.source,
Object::LatexFragment(obj) => obj.source,
}
}
}
@ -201,3 +208,9 @@ impl<'s> Source<'s> for Entity<'s> {
self.source
}
}
impl<'s> Source<'s> for LatexFragment<'s> {
fn get_source(&'s self) -> &'s str {
self.source
}
}

View File

@ -9,6 +9,7 @@ use super::Context;
use crate::error::Res;
use crate::parser::angle_link::angle_link;
use crate::parser::entity::entity;
use crate::parser::latex_fragment::latex_fragment;
use crate::parser::object::Object;
use crate::parser::org_macro::org_macro;
use crate::parser::plain_link::plain_link;
@ -21,11 +22,15 @@ pub fn standard_set_object<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, Object<'s>> {
// TODO: LaTeX fragments, export snippets, footnote references, citations (NOT citation references), inline babel calls, inline source blocks, line breaks, links, macros, targets and radio targets, statistics cookies, subscript and superscript, timestamps, and text markup.
// TODO: export snippets, footnote references, citations (NOT citation references), inline babel calls, inline source blocks, line breaks, links, macros, targets and radio targets, statistics cookies, subscript and superscript, timestamps, and text markup.
not(|i| context.check_exit_matcher(i))(input)?;
alt((
map(parser_with_context!(entity)(context), Object::Entity),
map(
parser_with_context!(latex_fragment)(context),
Object::LatexFragment,
),
map(parser_with_context!(radio_link)(context), Object::RadioLink),
map(
parser_with_context!(radio_target)(context),
@ -48,11 +53,15 @@ pub fn minimal_set_object<'r, 's>(
context: Context<'r, 's>,
input: &'s str,
) -> Res<&'s str, Object<'s>> {
// TODO: LaTeX fragments, superscripts and subscripts
// TODO: superscripts and subscripts
not(|i| context.check_exit_matcher(i))(input)?;
alt((
map(parser_with_context!(entity)(context), Object::Entity),
map(
parser_with_context!(latex_fragment)(context),
Object::LatexFragment,
),
parser_with_context!(text_markup)(context),
map(parser_with_context!(plain_text)(context), Object::PlainText),
))(input)
@ -66,6 +75,10 @@ pub fn any_object_except_plain_text<'r, 's>(
// Used for exit matchers so this does not check exit matcher condition.
alt((
map(parser_with_context!(entity)(context), Object::Entity),
map(
parser_with_context!(latex_fragment)(context),
Object::LatexFragment,
),
map(parser_with_context!(radio_link)(context), Object::RadioLink),
map(
parser_with_context!(radio_target)(context),

View File

@ -175,7 +175,7 @@ fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> {
_ => false,
}),
'\\',
one_of(r#""n"#),
one_of(r#""n\\"#),
)(remaining)?;
let (remaining, _) = tag(r#"""#)(remaining)?;
let source = get_consumed(input, remaining);
@ -298,4 +298,27 @@ mod tests {
r#"baz"#
);
}
#[test]
fn string_containing_escaped_characters() {
let input = r#" (foo "\\( x=2 \\)" bar) "#;
let (remaining, parsed) = sexp_with_padding(input).expect("Parse the input");
assert_eq!(remaining, "");
assert!(match parsed {
Token::Atom(_) => false,
Token::List(_) => true,
Token::TextWithProperties(_) => false,
});
let children = match parsed {
Token::List(children) => children,
_ => panic!("Should be a list."),
};
assert_eq!(
match children.get(1) {
Some(Token::Atom(body)) => *body,
_ => panic!("First child should be an atom."),
},
r#""\\( x=2 \\)""#
)
}
}

View File

@ -53,6 +53,7 @@ impl<'r, 's> Token<'r, 's> {
Object::AngleLink(_) => Box::new(std::iter::empty()),
Object::OrgMacro(_) => Box::new(std::iter::empty()),
Object::Entity(_) => Box::new(std::iter::empty()),
Object::LatexFragment(_) => Box::new(std::iter::empty()),
},
Token::Element(elem) => match elem {
Element::Paragraph(inner) => Box::new(inner.children.iter().map(Token::Object)),