diff --git a/org_mode_samples/object/macro/capitalize.org b/org_mode_samples/object/macro/capitalize.org new file mode 100644 index 00000000..8bbd61d7 --- /dev/null +++ b/org_mode_samples/object/macro/capitalize.org @@ -0,0 +1 @@ +{{{Foo(Bar,Baz)}}} diff --git a/org_mode_samples/object/macro/escape.org b/org_mode_samples/object/macro/escape.org new file mode 100644 index 00000000..312f3d4a --- /dev/null +++ b/org_mode_samples/object/macro/escape.org @@ -0,0 +1,7 @@ +{{{foo}}} + +{{{fo\o}}} + +{{{foo(b\ar)}}} + +{{{foo(b\,r)}}} diff --git a/org_mode_samples/object/macro/whitespace_in_args.org b/org_mode_samples/object/macro/whitespace_in_args.org new file mode 100644 index 00000000..518d4d8b --- /dev/null +++ b/org_mode_samples/object/macro/whitespace_in_args.org @@ -0,0 +1,10 @@ +{{{foo(bar baz)}}} + +{{{foo(bar +baz)}}} + +{{{foo(foo )}}} + +{{{foo(foo , bar )}}} + +{{{foo(foo , bar , baz )}}} diff --git a/src/compare/compare_field.rs b/src/compare/compare_field.rs index f4af14d0..8cc41381 100644 --- a/src/compare/compare_field.rs +++ b/src/compare/compare_field.rs @@ -1,6 +1,7 @@ use std::fmt::Debug; use super::diff::DiffStatus; +use super::sexp::unquote; use super::sexp::Token; use super::util::get_property; use super::util::get_property_quoted_string; @@ -102,3 +103,59 @@ pub(crate) fn compare_property_unquoted_atom<'b, 's, 'x, R, RG: Fn(R) -> Option< Ok(None) } } + +pub(crate) fn compare_property_list_of_quoted_string< + 'b, + 's, + 'x, + R, + RV: AsRef + std::fmt::Debug, + RI: Iterator, + RG: Fn(R) -> Option, +>( + emacs: &'b Token<'s>, + rust_node: R, + emacs_field: &'x str, + rust_value_getter: RG, +) -> Result)>, Box> { + let value = get_property(emacs, emacs_field)? + .map(Token::as_list) + .map_or(Ok(None), |r| r.map(Some))?; + let rust_value = rust_value_getter(rust_node); + // TODO: Seems we are needlessly coverting to a vec here. + let rust_value: Option> = rust_value.map(|it| it.collect()); + match (value, &rust_value) { + (None, None) => {} + (None, Some(_)) | (Some(_), None) => { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}", + emacs_field, value, rust_value + )); + return Ok(Some((this_status, message))); + } + (Some(el), Some(rl)) if el.len() != rl.len() => { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}", + emacs_field, value, rust_value + )); + return Ok(Some((this_status, message))); + } + (Some(el), Some(rl)) => { + for (e, r) in el.iter().zip(rl) { + let e = unquote(e.as_atom()?)?; + let r = r.as_ref(); + if e != r { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}. Full list: {:?} != {:?}", + emacs_field, e, r, value, rust_value + )); + return Ok(Some((this_status, message))); + } + } + } + } + Ok(None) +} diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 2f26f224..5afb6e08 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -6,6 +6,7 @@ use std::collections::HashSet; use super::compare_field::compare_identity; use super::compare_field::compare_property_always_nil; +use super::compare_field::compare_property_list_of_quoted_string; use super::compare_field::compare_property_quoted_string; use super::compare_field::compare_property_unquoted_atom; use super::elisp_fact::ElispFact; @@ -3068,10 +3069,35 @@ fn compare_org_macro<'b, 's>( emacs: &'b Token<'s>, rust: &'b OrgMacro<'s>, ) -> Result, Box> { - let this_status = DiffStatus::Good; - let message = None; + let mut this_status = DiffStatus::Good; + let mut message = None; - // TODO: Compare :key :value :args + if let Some((new_status, new_message)) = compare_properties!( + emacs, + rust, + ( + EmacsField::Required(":key"), + |r| Some(r.get_key()), + compare_property_quoted_string + ), + ( + EmacsField::Required(":value"), + |r| Some(r.value), + compare_property_quoted_string + ), + ( + EmacsField::Required(":args"), + |r| if r.args.is_empty() { + None + } else { + Some(r.get_args()) + }, + compare_property_list_of_quoted_string + ) + )? { + this_status = new_status; + message = new_message; + } Ok(DiffResult { status: this_status, diff --git a/src/parser/org_macro.rs b/src/parser/org_macro.rs index 093dc31e..9ead92ec 100644 --- a/src/parser/org_macro.rs +++ b/src/parser/org_macro.rs @@ -1,11 +1,13 @@ use nom::bytes::complete::tag; use nom::character::complete::anychar; +use nom::character::complete::space0; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::verify; use nom::multi::many0; use nom::multi::separated_list0; +use nom::sequence::tuple; use super::org_source::OrgSource; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; @@ -26,6 +28,7 @@ pub(crate) fn org_macro<'b, 'g, 'r, 's>( let (remaining, macro_name) = org_macro_name(context, remaining)?; let (remaining, macro_args) = opt(parser_with_context!(org_macro_args)(context))(remaining)?; let (remaining, _) = tag("}}}")(remaining)?; + let macro_value = get_consumed(input, remaining); let (remaining, _trailing_whitespace) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; @@ -34,12 +37,13 @@ pub(crate) fn org_macro<'b, 'g, 'r, 's>( remaining, OrgMacro { source: source.into(), - macro_name: macro_name.into(), - macro_args: macro_args + key: macro_name.into(), + args: macro_args .unwrap_or_else(|| Vec::with_capacity(0)) .into_iter() .map(|arg| arg.into()) .collect(), + value: Into::<&str>::into(macro_value), }, )) } @@ -65,7 +69,7 @@ fn org_macro_args<'b, 'g, 'r, 's>( let (remaining, _) = tag("(")(input)?; let (remaining, args) = separated_list0(tag(","), parser_with_context!(org_macro_arg)(context))(remaining)?; - let (remaining, _) = tag(")")(remaining)?; + let (remaining, _) = tuple((space0, tag(")")))(remaining)?; Ok((remaining, args)) } @@ -80,6 +84,10 @@ fn org_macro_arg<'b, 'g, 'r, 's>( loop { not(parser_with_context!(exit_matcher_parser)(context))(remaining)?; not(peek(tag("}}}")))(remaining)?; + if peek(tuple((space0::, CustomError<_>>, tag(")"))))(remaining).is_ok() { + break; + } + let (new_remaining, next_char) = anychar(remaining)?; if escaping { remaining = new_remaining; diff --git a/src/types/object.rs b/src/types/object.rs index 01435d4f..22420433 100644 --- a/src/types/object.rs +++ b/src/types/object.rs @@ -1,9 +1,11 @@ use std::borrow::Borrow; use std::borrow::Cow; +use super::util::coalesce_whitespace_escaped; use super::util::coalesce_whitespace_if_line_break; use super::util::remove_line_break; use super::util::remove_whitespace_if_line_break; +use super::util::to_lowercase; use super::GetStandardProperties; use super::StandardProperties; @@ -148,8 +150,18 @@ pub struct AngleLink<'s> { #[derive(Debug, PartialEq)] pub struct OrgMacro<'s> { pub source: &'s str, - pub macro_name: &'s str, - pub macro_args: Vec<&'s str>, + + /// The key from the source. + /// + /// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_key` for an equivalent value. + pub key: &'s str, + + /// The args from the source. + /// + /// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_args` for an equivalent value. + pub args: Vec<&'s str>, + + pub value: &'s str, } #[derive(Debug, PartialEq)] @@ -732,3 +744,15 @@ impl<'s> AngleLink<'s> { self.search_option.map(remove_whitespace_if_line_break) } } + +impl<'s> OrgMacro<'s> { + pub fn get_key<'b>(&'b self) -> Cow<'s, str> { + to_lowercase(self.key) + } + + pub fn get_args<'b>(&'b self) -> impl Iterator> + 'b { + self.args + .iter() + .map(|arg| coalesce_whitespace_escaped('\\', |c| ",".contains(c))(*arg)) + } +} diff --git a/src/types/util.rs b/src/types/util.rs index c336474a..99cf5ed8 100644 --- a/src/types/util.rs +++ b/src/types/util.rs @@ -197,3 +197,324 @@ enum CoalesceWhitespaceIfLineBreakState { ret: String, }, } + +/// Removes all whitespace from a string. +/// +/// Example: "foo bar" => "foobar" and "foo \n bar" => "foobar". +#[allow(dead_code)] +pub(crate) fn coalesce_whitespace<'s>(input: &'s str) -> Cow<'s, str> { + let mut state = CoalesceWhitespace::Normal; + for (offset, c) in input.char_indices() { + match (&mut state, c) { + (CoalesceWhitespace::Normal, ' ' | '\t' | '\r' | '\n') => { + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..offset]); + ret.push(' '); + state = CoalesceWhitespace::HasWhitespace { + in_whitespace: true, + ret, + }; + } + (CoalesceWhitespace::Normal, _) => {} + ( + CoalesceWhitespace::HasWhitespace { in_whitespace, ret }, + ' ' | '\t' | '\r' | '\n', + ) => { + if !*in_whitespace { + *in_whitespace = true; + ret.push(' '); + } + } + (CoalesceWhitespace::HasWhitespace { in_whitespace, ret }, _) => { + *in_whitespace = false; + ret.push(c); + } + } + } + match state { + CoalesceWhitespace::Normal => Cow::Borrowed(input), + CoalesceWhitespace::HasWhitespace { + in_whitespace: _, + ret, + } => Cow::Owned(ret), + } +} + +enum CoalesceWhitespace { + Normal, + HasWhitespace { in_whitespace: bool, ret: String }, +} + +/// Removes all whitespace from a string and handle escaping characters. +/// +/// Example: "foo bar" => "foobar" and "foo \n bar" => "foobar" but if the escape character is backslash and comma is an escapable character than "foo\,bar" becomes "foo,bar". +pub(crate) fn coalesce_whitespace_escaped<'c, C: Fn(char) -> bool>( + escape_character: char, + escapable_characters: C, +) -> impl for<'s> Fn(&'s str) -> Cow<'s, str> { + move |input| impl_coalesce_whitespace_escaped(input, escape_character, &escapable_characters) +} + +fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( + input: &'s str, + escape_character: char, + escapable_characters: C, +) -> Cow<'s, str> { + let mut state = CoalesceWhitespaceEscaped::Normal { + in_whitespace: false, + }; + for (offset, c) in input.char_indices() { + state = match (state, c) { + (CoalesceWhitespaceEscaped::Normal { in_whitespace: _ }, c) + if c == escape_character => + { + CoalesceWhitespaceEscaped::NormalEscaping { + escape_offset: offset, + } + } + (CoalesceWhitespaceEscaped::Normal { in_whitespace }, ' ') => { + if in_whitespace { + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..offset]); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: true, + ret, + } + } else { + CoalesceWhitespaceEscaped::Normal { + in_whitespace: true, + } + } + } + (CoalesceWhitespaceEscaped::Normal { in_whitespace: _ }, '\t' | '\r' | '\n') => { + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..offset]); + ret.push(' '); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: true, + ret, + } + } + (CoalesceWhitespaceEscaped::Normal { in_whitespace: _ }, _) => { + CoalesceWhitespaceEscaped::Normal { + in_whitespace: false, + } + } + (CoalesceWhitespaceEscaped::NormalEscaping { escape_offset }, c) + if escapable_characters(c) => + { + // We escaped a character so we need mutation + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..escape_offset]); + ret.push(c); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: false, + ret, + } + } + + (CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ }, ' ') => { + // We didn't escape the character so continue as normal. + CoalesceWhitespaceEscaped::Normal { + in_whitespace: true, + } + } + ( + CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ }, + '\t' | '\r' | '\n', + ) => { + // We didn't escape the character but we hit whitespace anyway. + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..offset]); + ret.push(' '); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: true, + ret, + } + } + (CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ }, _) => { + // We didn't escape the character so continue as normal. + CoalesceWhitespaceEscaped::Normal { + in_whitespace: false, + } + } + + ( + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: _, + ret, + }, + c, + ) if c == escape_character => CoalesceWhitespaceEscaped::RequiresMutationEscaping { + ret, + matched_escape_character: c, + }, + ( + CoalesceWhitespaceEscaped::RequiresMutation { + mut in_whitespace, + mut ret, + }, + ' ' | '\t' | '\r' | '\n', + ) => { + if !in_whitespace { + in_whitespace = true; + ret.push(' '); + } + CoalesceWhitespaceEscaped::RequiresMutation { in_whitespace, ret } + } + ( + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: _, + mut ret, + }, + _, + ) => { + ret.push(c); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: false, + ret, + } + } + ( + CoalesceWhitespaceEscaped::RequiresMutationEscaping { + mut ret, + matched_escape_character: _, + }, + c, + ) if escapable_characters(c) => { + ret.push(c); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: false, + ret, + } + } + ( + CoalesceWhitespaceEscaped::RequiresMutationEscaping { + mut ret, + matched_escape_character: _, + }, + ' ' | '\t' | '\r' | '\n', + ) => { + ret.push(' '); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: true, + ret, + } + } + ( + CoalesceWhitespaceEscaped::RequiresMutationEscaping { + mut ret, + matched_escape_character, + }, + c, + ) => { + ret.push(matched_escape_character); + ret.push(c); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: false, + ret, + } + } + } + } + match state { + CoalesceWhitespaceEscaped::Normal { in_whitespace: _ } => Cow::Borrowed(input), + CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ } => Cow::Borrowed(input), + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: _, + ret, + } => Cow::Owned(ret), + CoalesceWhitespaceEscaped::RequiresMutationEscaping { + mut ret, + matched_escape_character, + } => { + ret.push(matched_escape_character); + Cow::Owned(ret) + } + } +} + +enum CoalesceWhitespaceEscaped { + Normal { + in_whitespace: bool, + }, + NormalEscaping { + escape_offset: usize, + }, + RequiresMutation { + in_whitespace: bool, + ret: String, + }, + RequiresMutationEscaping { + ret: String, + matched_escape_character: char, + }, +} + +pub(crate) fn to_lowercase<'s>(input: &'s str) -> Cow<'s, str> { + if input.chars().any(|c| !c.is_lowercase()) { + Cow::Owned(input.to_lowercase()) + } else { + Cow::Borrowed(input) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn coalesce_whitespace_escaped_default() -> Result<(), Box> { + let input = "foobarbaz"; + let output = coalesce_whitespace_escaped('&', |c| "".contains(c))(input); + assert_eq!(output, "foobarbaz"); + assert!(matches!(output, Cow::Borrowed(_))); + Ok(()) + } + + #[test] + fn coalesce_whitespace_escaped_whitespace_single() -> Result<(), Box> { + let input = "foo bar baz"; + let output = coalesce_whitespace_escaped('&', |c| "".contains(c))(input); + assert_eq!(output, "foo bar baz"); + assert!(matches!(output, Cow::Borrowed(_))); + Ok(()) + } + + #[test] + fn coalesce_whitespace_escaped_whitespace_double() -> Result<(), Box> { + let input = "foo bar baz"; + let output = coalesce_whitespace_escaped('&', |c| "".contains(c))(input); + assert_eq!(output, "foo bar baz"); + assert!(matches!(output, Cow::Owned(_))); + Ok(()) + } + + #[test] + fn coalesce_whitespace_escaped_escape_match() -> Result<(), Box> { + let input = "foo &bar baz"; + let output = coalesce_whitespace_escaped('&', |c| "b".contains(c))(input); + assert_eq!(output, "foo bar baz"); + assert!(matches!(output, Cow::Owned(_))); + Ok(()) + } + + #[test] + fn coalesce_whitespace_escaped_escape_mismatch() -> Result<(), Box> { + let input = "foo b&ar baz"; + let output = coalesce_whitespace_escaped('&', |c| "b".contains(c))(input); + assert_eq!(output, "foo b&ar baz"); + assert!(matches!(output, Cow::Owned(_))); + Ok(()) + } + + #[test] + fn coalesce_whitespace_escaped_escape_mismatch_around_whitespace( + ) -> Result<(), Box> { + let input = "foo& bar &baz"; + let output = coalesce_whitespace_escaped('&', |c| "z".contains(c))(input); + assert_eq!(output, "foo& bar &baz"); + assert!(matches!(output, Cow::Borrowed(_))); + Ok(()) + } +}