diff --git a/build.rs b/build.rs index 64e0928..bb99b66 100644 --- a/build.rs +++ b/build.rs @@ -79,7 +79,6 @@ fn is_expect_fail(name: &str) -> Option<&str> { "element_container_priority_greater_block_greater_block" => Some("Need to implement subscript."), "element_container_priority_section_greater_block" => Some("Need to implement subscript."), "paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), - "entity_simple" => Some("Need to implement LaTeX fragments."), _ => None, } } diff --git a/org_mode_samples/latex_fragment/fragment_vs_environment.org b/org_mode_samples/latex_fragment/fragment_vs_environment.org new file mode 100644 index 0000000..005027f --- /dev/null +++ b/org_mode_samples/latex_fragment/fragment_vs_environment.org @@ -0,0 +1,7 @@ +\begin{itemize} +\item foo \sqrt{x} +\end{itemize} + +\begin{itemize} +\item bar \sqrt{y} +\end{itemize} % Need text on this line to prevent it from becoming a LaTeX environment org-mode element diff --git a/org_mode_samples/latex_fragment/math_mode.org b/org_mode_samples/latex_fragment/math_mode.org new file mode 100644 index 0000000..e1519f6 --- /dev/null +++ b/org_mode_samples/latex_fragment/math_mode.org @@ -0,0 +1 @@ +tex can have math between dollar signs like $x^2=y$ and $$ x=+\sqrt{y} $$ but also braces and brackets like \( x=2 \) and \[ x=-\sqrt{2} \] diff --git a/org_mode_samples/latex_fragment/simple.org b/org_mode_samples/latex_fragment/simple.org new file mode 100644 index 0000000..6acb8ff --- /dev/null +++ b/org_mode_samples/latex_fragment/simple.org @@ -0,0 +1,4 @@ +\begin{itemize} +% this would be a LaTeX comment if this was a LaTeX document +\item Heres some math \sqrt{y} +\end{itemize} % Need text on this line to prevent it from becoming a LaTeX environment org-mode element diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 1156dc8..3ec5328 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -24,6 +24,7 @@ use crate::parser::HorizontalRule; use crate::parser::Italic; use crate::parser::Keyword; use crate::parser::LatexEnvironment; +use crate::parser::LatexFragment; use crate::parser::Object; use crate::parser::OrgMacro; use crate::parser::Paragraph; @@ -156,6 +157,7 @@ fn compare_object<'s>( Object::AngleLink(obj) => compare_angle_link(source, emacs, obj), Object::OrgMacro(obj) => compare_org_macro(source, emacs, obj), Object::Entity(obj) => compare_entity(source, emacs, obj), + Object::LatexFragment(obj) => compare_latex_fragment(source, emacs, obj), } } @@ -1263,3 +1265,26 @@ fn compare_entity<'s>( children: Vec::new(), }) } + +fn compare_latex_fragment<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s LatexFragment<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "latex-fragment"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: emacs_name.to_owned(), + message: None, + children: Vec::new(), + }) +} diff --git a/src/parser/latex_environment.rs b/src/parser/latex_environment.rs index a1353f7..7b6e653 100644 --- a/src/parser/latex_environment.rs +++ b/src/parser/latex_environment.rs @@ -2,10 +2,13 @@ use nom::branch::alt; use nom::bytes::complete::tag; use nom::bytes::complete::tag_no_case; use nom::bytes::complete::take_while1; +use nom::character::complete::anychar; use nom::character::complete::line_ending; use nom::character::complete::space0; use nom::combinator::eof; -use nom::combinator::map; +use nom::combinator::peek; +use nom::combinator::recognize; +use nom::multi::many_till; use nom::sequence::tuple; use super::util::get_consumed; @@ -15,7 +18,7 @@ use crate::parser::exiting::ExitClass; use crate::parser::parser_context::ContextElement; use crate::parser::parser_context::ExitMatcherNode; use crate::parser::parser_with_context::parser_with_context; -use crate::parser::plain_text::plain_text; +use crate::parser::util::exit_matcher_parser; use crate::parser::util::start_of_line; use crate::parser::LatexEnvironment; @@ -41,9 +44,7 @@ pub fn latex_environment<'r, 's>( exit_matcher: &latex_environment_end_specialized, })); - let (remaining, _contents) = map(parser_with_context!(plain_text)(&parser_context), |obj| { - obj.source - })(remaining)?; + let (remaining, _contents) = contents(&latex_environment_end_specialized, context, remaining)?; let (remaining, _end) = latex_environment_end_specialized(&parser_context, remaining)?; let source = get_consumed(input, remaining); @@ -55,6 +56,23 @@ fn name<'s>(input: &'s str) -> Res<&'s str, &'s str> { take_while1(|c: char| c.is_alphanumeric() || c == '*')(input) } +#[tracing::instrument(ret, level = "debug", skip(end_matcher))] +pub fn contents<'r, 's, F: Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str>>( + end_matcher: F, + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + let (remaining, source) = recognize(many_till( + anychar, + peek(alt(( + parser_with_context!(exit_matcher_parser)(context), + parser_with_context!(end_matcher)(context), + ))), + ))(input)?; + + Ok((remaining, source)) +} + fn latex_environment_end( current_name: &str, ) -> impl for<'r, 's> Fn(Context<'r, 's>, &'s str) -> Res<&'s str, &'s str> { diff --git a/src/parser/latex_fragment.rs b/src/parser/latex_fragment.rs new file mode 100644 index 0000000..7f4e9d7 --- /dev/null +++ b/src/parser/latex_fragment.rs @@ -0,0 +1,230 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::alpha1; +use nom::character::complete::anychar; +use nom::character::complete::line_ending; +use nom::character::complete::none_of; +use nom::character::complete::one_of; +use nom::character::complete::space0; +use nom::combinator::opt; +use nom::combinator::peek; +use nom::combinator::recognize; +use nom::combinator::verify; +use nom::multi::many_till; +use nom::sequence::tuple; + +use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; +use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::exit_matcher_parser; +use crate::parser::util::get_consumed; +use crate::parser::util::get_one_before; +use crate::parser::LatexFragment; + +#[tracing::instrument(ret, level = "debug")] +pub fn latex_fragment<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, LatexFragment<'s>> { + let (remaining, _) = alt(( + parser_with_context!(raw_latex_fragment)(context), + parser_with_context!(escaped_parenthesis_fragment)(context), + parser_with_context!(escaped_bracket_fragment)(context), + parser_with_context!(double_dollar_fragment)(context), + parser_with_context!(dollar_char_fragment)(context), + parser_with_context!(bordered_dollar_fragment)(context), + ))(input)?; + let (remaining, _) = space0(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, LatexFragment { source })) +} + +#[tracing::instrument(ret, level = "debug")] +fn raw_latex_fragment<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let (remaining, _) = tag("\\")(input)?; + let (remaining, _) = name(context, remaining)?; + let (remaining, _) = opt(parser_with_context!(brackets)(context))(remaining)?; + + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} + +#[tracing::instrument(ret, level = "debug")] +fn name<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + alpha1(input) +} + +#[tracing::instrument(ret, level = "debug")] +fn brackets<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let (remaining, body) = alt(( + recognize(tuple(( + tag("["), + many_till( + anychar, + peek(alt(( + parser_with_context!(exit_matcher_parser)(context), + alt((recognize(one_of("{}[]")), line_ending)), + ))), + ), + tag("]"), + ))), + recognize(tuple(( + tag("{"), + many_till( + anychar, + peek(alt(( + parser_with_context!(exit_matcher_parser)(context), + alt((recognize(one_of("{}")), line_ending)), + ))), + ), + tag("}"), + ))), + ))(input)?; + Ok((remaining, body)) +} + +#[tracing::instrument(ret, level = "debug")] +fn escaped_parenthesis_fragment<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + let (remaining, _) = tag("\\(")(input)?; + let (remaining, _) = recognize(many_till( + anychar, + peek(alt(( + parser_with_context!(exit_matcher_parser)(context), + tag("\\)"), + ))), + ))(remaining)?; + let (remaining, _) = tag("\\)")(remaining)?; + + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} + +#[tracing::instrument(ret, level = "debug")] +fn escaped_bracket_fragment<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + let (remaining, _) = tag("\\[")(input)?; + let (remaining, _) = recognize(many_till( + anychar, + peek(alt(( + parser_with_context!(exit_matcher_parser)(context), + tag("\\]"), + ))), + ))(remaining)?; + let (remaining, _) = tag("\\]")(remaining)?; + + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} + +#[tracing::instrument(ret, level = "debug")] +fn double_dollar_fragment<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + // TODO: The documentation on the dollar sign versions is incomplete. Test to figure out what the real requirements are. For example, can this span more than 3 lines and can this contain a single $ since its terminated by $$? + let (remaining, _) = tag("$$")(input)?; + let (remaining, _) = recognize(many_till( + anychar, + peek(alt(( + parser_with_context!(exit_matcher_parser)(context), + tag("$"), + ))), + ))(remaining)?; + let (remaining, _) = tag("$$")(remaining)?; + + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} + +#[tracing::instrument(ret, level = "debug")] +fn dollar_char_fragment<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let (_, _) = pre(context, input)?; + let (remaining, _) = tag("$")(input)?; + let (remaining, _) = verify(none_of(".,?;\""), |c| !c.is_whitespace())(remaining)?; + let (remaining, _) = tag("$")(remaining)?; + let (_, _) = peek(parser_with_context!(post)(context))(remaining)?; + + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + if let Some('$') = preceding_character { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Not a valid pre character for dollar char fragment.", + )))); + } + Ok((input, ())) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn post<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { + // TODO: What about eof? Test to find out. + + // TODO: Figure out which punctuation characters should be included. + let (remaining, _) = alt((recognize(one_of(" \t-.,;:!?'\"")), line_ending))(input)?; + Ok((remaining, ())) +} + +#[tracing::instrument(ret, level = "debug")] +fn bordered_dollar_fragment<'r, 's>( + context: Context<'r, 's>, + input: &'s str, +) -> Res<&'s str, &'s str> { + let (_, _) = pre(context, input)?; + let (remaining, _) = tag("$")(input)?; + // TODO: I'm assuming I should be peeking at the borders but the documentation is not clear. Test to figure out. + let (_, _) = peek(parser_with_context!(open_border)(context))(remaining)?; + + // TODO: As an optimization it would be nice to exit early upon hitting the 3rd line break + let (remaining, _) = verify( + recognize(many_till( + anychar, + peek(alt(( + parser_with_context!(exit_matcher_parser)(context), + tag("$"), + ))), + )), + |body: &str| body.lines().take(4).count() <= 3, + )(remaining)?; + + let (_, _) = peek(parser_with_context!(close_border)(context))(remaining)?; + let (remaining, _) = tag("$")(remaining)?; + let (_, _) = peek(parser_with_context!(post)(context))(remaining)?; + + let source = get_consumed(input, remaining); + Ok((remaining, source)) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn open_border<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + recognize(verify(none_of(".,;$"), |c| !c.is_whitespace()))(input) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn close_border<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + match preceding_character { + Some(c) if !c.is_whitespace() && !".,;$".contains(c) => Ok((input, ())), + _ => { + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Not a valid pre character for dollar char fragment.", + )))); + } + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 22124ff..0f41c30 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -16,6 +16,7 @@ mod greater_element; mod horizontal_rule; mod keyword; mod latex_environment; +mod latex_fragment; mod lesser_block; mod lesser_element; mod list; @@ -73,6 +74,7 @@ pub use object::Bold; pub use object::Code; pub use object::Entity; pub use object::Italic; +pub use object::LatexFragment; pub use object::Object; pub use object::OrgMacro; pub use object::PlainLink; diff --git a/src/parser/object.rs b/src/parser/object.rs index c0428be..7aa01f5 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -16,6 +16,7 @@ pub enum Object<'s> { AngleLink(AngleLink<'s>), OrgMacro(OrgMacro<'s>), Entity(Entity<'s>), + LatexFragment(LatexFragment<'s>), } #[derive(Debug, PartialEq)] @@ -103,6 +104,11 @@ pub struct Entity<'s> { pub entity_name: &'s str, } +#[derive(Debug, PartialEq)] +pub struct LatexFragment<'s> { + pub source: &'s str, +} + impl<'s> Source<'s> for Object<'s> { fn get_source(&'s self) -> &'s str { match self { @@ -120,6 +126,7 @@ impl<'s> Source<'s> for Object<'s> { Object::AngleLink(obj) => obj.source, Object::OrgMacro(obj) => obj.source, Object::Entity(obj) => obj.source, + Object::LatexFragment(obj) => obj.source, } } } @@ -201,3 +208,9 @@ impl<'s> Source<'s> for Entity<'s> { self.source } } + +impl<'s> Source<'s> for LatexFragment<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} diff --git a/src/parser/object_parser.rs b/src/parser/object_parser.rs index fb72bcf..437869e 100644 --- a/src/parser/object_parser.rs +++ b/src/parser/object_parser.rs @@ -9,6 +9,7 @@ use super::Context; use crate::error::Res; use crate::parser::angle_link::angle_link; use crate::parser::entity::entity; +use crate::parser::latex_fragment::latex_fragment; use crate::parser::object::Object; use crate::parser::org_macro::org_macro; use crate::parser::plain_link::plain_link; @@ -21,11 +22,15 @@ pub fn standard_set_object<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Object<'s>> { - // TODO: LaTeX fragments, export snippets, footnote references, citations (NOT citation references), inline babel calls, inline source blocks, line breaks, links, macros, targets and radio targets, statistics cookies, subscript and superscript, timestamps, and text markup. + // TODO: export snippets, footnote references, citations (NOT citation references), inline babel calls, inline source blocks, line breaks, links, macros, targets and radio targets, statistics cookies, subscript and superscript, timestamps, and text markup. not(|i| context.check_exit_matcher(i))(input)?; alt(( map(parser_with_context!(entity)(context), Object::Entity), + map( + parser_with_context!(latex_fragment)(context), + Object::LatexFragment, + ), map(parser_with_context!(radio_link)(context), Object::RadioLink), map( parser_with_context!(radio_target)(context), @@ -48,11 +53,15 @@ pub fn minimal_set_object<'r, 's>( context: Context<'r, 's>, input: &'s str, ) -> Res<&'s str, Object<'s>> { - // TODO: LaTeX fragments, superscripts and subscripts + // TODO: superscripts and subscripts not(|i| context.check_exit_matcher(i))(input)?; alt(( map(parser_with_context!(entity)(context), Object::Entity), + map( + parser_with_context!(latex_fragment)(context), + Object::LatexFragment, + ), parser_with_context!(text_markup)(context), map(parser_with_context!(plain_text)(context), Object::PlainText), ))(input) @@ -66,6 +75,10 @@ pub fn any_object_except_plain_text<'r, 's>( // Used for exit matchers so this does not check exit matcher condition. alt(( map(parser_with_context!(entity)(context), Object::Entity), + map( + parser_with_context!(latex_fragment)(context), + Object::LatexFragment, + ), map(parser_with_context!(radio_link)(context), Object::RadioLink), map( parser_with_context!(radio_target)(context), diff --git a/src/parser/sexp.rs b/src/parser/sexp.rs index fc97365..8ed62b5 100644 --- a/src/parser/sexp.rs +++ b/src/parser/sexp.rs @@ -175,7 +175,7 @@ fn quoted_atom<'s>(input: &'s str) -> Res<&'s str, Token<'s>> { _ => false, }), '\\', - one_of(r#""n"#), + one_of(r#""n\\"#), )(remaining)?; let (remaining, _) = tag(r#"""#)(remaining)?; let source = get_consumed(input, remaining); @@ -298,4 +298,27 @@ mod tests { r#"baz"# ); } + + #[test] + fn string_containing_escaped_characters() { + let input = r#" (foo "\\( x=2 \\)" bar) "#; + let (remaining, parsed) = sexp_with_padding(input).expect("Parse the input"); + assert_eq!(remaining, ""); + assert!(match parsed { + Token::Atom(_) => false, + Token::List(_) => true, + Token::TextWithProperties(_) => false, + }); + let children = match parsed { + Token::List(children) => children, + _ => panic!("Should be a list."), + }; + assert_eq!( + match children.get(1) { + Some(Token::Atom(body)) => *body, + _ => panic!("First child should be an atom."), + }, + r#""\\( x=2 \\)""# + ) + } } diff --git a/src/parser/token.rs b/src/parser/token.rs index 02381d2..04deabf 100644 --- a/src/parser/token.rs +++ b/src/parser/token.rs @@ -53,6 +53,7 @@ impl<'r, 's> Token<'r, 's> { Object::AngleLink(_) => Box::new(std::iter::empty()), Object::OrgMacro(_) => Box::new(std::iter::empty()), Object::Entity(_) => Box::new(std::iter::empty()), + Object::LatexFragment(_) => Box::new(std::iter::empty()), }, Token::Element(elem) => match elem { Element::Paragraph(inner) => Box::new(inner.children.iter().map(Token::Object)),