From 1a2f0856dafddc033dbbbfd2b345de44cf079c6a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 8 Oct 2023 14:40:01 -0400 Subject: [PATCH 01/10] Compare macro properties. --- src/compare/compare_field.rs | 55 ++++++++++++++++++++++++++++++++++++ src/compare/diff.rs | 32 +++++++++++++++++++-- 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/src/compare/compare_field.rs b/src/compare/compare_field.rs index f4af14d0..319ba7fd 100644 --- a/src/compare/compare_field.rs +++ b/src/compare/compare_field.rs @@ -1,6 +1,7 @@ use std::fmt::Debug; use super::diff::DiffStatus; +use super::sexp::unquote; use super::sexp::Token; use super::util::get_property; use super::util::get_property_quoted_string; @@ -102,3 +103,57 @@ pub(crate) fn compare_property_unquoted_atom<'b, 's, 'x, R, RG: Fn(R) -> Option< Ok(None) } } + +pub(crate) fn compare_property_list_of_quoted_string< + 'b, + 's, + 'x, + 'y, + R, + RV: AsRef + std::fmt::Debug + 'y, + RG: Fn(R) -> Option<&'y Vec>, +>( + emacs: &'b Token<'s>, + rust_node: R, + emacs_field: &'x str, + rust_value_getter: RG, +) -> Result)>, Box> { + let value = get_property(emacs, emacs_field)? + .map(Token::as_list) + .map_or(Ok(None), |r| r.map(Some))?; + let rust_value = rust_value_getter(rust_node); + match (value, rust_value) { + (None, None) => {} + (None, Some(_)) | (Some(_), None) => { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}", + emacs_field, value, rust_value + )); + return Ok(Some((this_status, message))); + } + (Some(el), Some(rl)) if el.len() != rl.len() => { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}", + emacs_field, value, rust_value + )); + return Ok(Some((this_status, message))); + } + (Some(el), Some(rl)) => { + for (e, r) in el.iter().zip(rl) { + let e = unquote(e.as_atom()?)?; + let r = r.as_ref(); + if e != r { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}", + emacs_field, value, rust_value + )); + return Ok(Some((this_status, message))); + } + } + } + } + Ok(None) +} diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 2f26f224..c1058219 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -6,6 +6,7 @@ use std::collections::HashSet; use super::compare_field::compare_identity; use super::compare_field::compare_property_always_nil; +use super::compare_field::compare_property_list_of_quoted_string; use super::compare_field::compare_property_quoted_string; use super::compare_field::compare_property_unquoted_atom; use super::elisp_fact::ElispFact; @@ -3068,10 +3069,35 @@ fn compare_org_macro<'b, 's>( emacs: &'b Token<'s>, rust: &'b OrgMacro<'s>, ) -> Result, Box> { - let this_status = DiffStatus::Good; - let message = None; + let mut this_status = DiffStatus::Good; + let mut message = None; - // TODO: Compare :key :value :args + if let Some((new_status, new_message)) = compare_properties!( + emacs, + rust, + ( + EmacsField::Required(":key"), + |r| Some(r.macro_name), + compare_property_quoted_string + ), + ( + EmacsField::Required(":value"), + |r| Some(r.source), + compare_property_quoted_string + ), + ( + EmacsField::Required(":args"), + |r| if r.macro_args.is_empty() { + None + } else { + Some(&r.macro_args) + }, + compare_property_list_of_quoted_string + ) + )? { + this_status = new_status; + message = new_message; + } Ok(DiffResult { status: this_status, From 37bc5ef71286be6d21eab4da1e6a0e80807f01f3 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 8 Oct 2023 14:48:29 -0400 Subject: [PATCH 02/10] Do not include whitespace at the end of value. --- org_mode_samples/object/macro/whitespace_in_args.org | 4 ++++ src/compare/diff.rs | 2 +- src/parser/org_macro.rs | 2 ++ src/types/object.rs | 1 + 4 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 org_mode_samples/object/macro/whitespace_in_args.org diff --git a/org_mode_samples/object/macro/whitespace_in_args.org b/org_mode_samples/object/macro/whitespace_in_args.org new file mode 100644 index 00000000..1b105031 --- /dev/null +++ b/org_mode_samples/object/macro/whitespace_in_args.org @@ -0,0 +1,4 @@ +{{{foo(bar baz)}}} + +{{{foo(bar +baz)}}} diff --git a/src/compare/diff.rs b/src/compare/diff.rs index c1058219..3c6da09f 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -3082,7 +3082,7 @@ fn compare_org_macro<'b, 's>( ), ( EmacsField::Required(":value"), - |r| Some(r.source), + |r| Some(r.macro_value), compare_property_quoted_string ), ( diff --git a/src/parser/org_macro.rs b/src/parser/org_macro.rs index 093dc31e..4cc6e045 100644 --- a/src/parser/org_macro.rs +++ b/src/parser/org_macro.rs @@ -26,6 +26,7 @@ pub(crate) fn org_macro<'b, 'g, 'r, 's>( let (remaining, macro_name) = org_macro_name(context, remaining)?; let (remaining, macro_args) = opt(parser_with_context!(org_macro_args)(context))(remaining)?; let (remaining, _) = tag("}}}")(remaining)?; + let macro_value = get_consumed(input, remaining); let (remaining, _trailing_whitespace) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; @@ -40,6 +41,7 @@ pub(crate) fn org_macro<'b, 'g, 'r, 's>( .into_iter() .map(|arg| arg.into()) .collect(), + macro_value: Into::<&str>::into(macro_value), }, )) } diff --git a/src/types/object.rs b/src/types/object.rs index 01435d4f..f27324fb 100644 --- a/src/types/object.rs +++ b/src/types/object.rs @@ -150,6 +150,7 @@ pub struct OrgMacro<'s> { pub source: &'s str, pub macro_name: &'s str, pub macro_args: Vec<&'s str>, + pub macro_value: &'s str, } #[derive(Debug, PartialEq)] From a32cea813919f9ebf7d21afda0f07bd276cc1e25 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 8 Oct 2023 15:08:21 -0400 Subject: [PATCH 03/10] Coalesce whitespace in macro args. --- src/compare/compare_field.rs | 10 ++++---- src/compare/diff.rs | 2 +- src/types/object.rs | 10 ++++++++ src/types/util.rs | 46 ++++++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 5 deletions(-) diff --git a/src/compare/compare_field.rs b/src/compare/compare_field.rs index 319ba7fd..3173ea8e 100644 --- a/src/compare/compare_field.rs +++ b/src/compare/compare_field.rs @@ -108,10 +108,10 @@ pub(crate) fn compare_property_list_of_quoted_string< 'b, 's, 'x, - 'y, R, - RV: AsRef + std::fmt::Debug + 'y, - RG: Fn(R) -> Option<&'y Vec>, + RV: AsRef + std::fmt::Debug, + RI: Iterator, + RG: Fn(R) -> Option, >( emacs: &'b Token<'s>, rust_node: R, @@ -122,7 +122,9 @@ pub(crate) fn compare_property_list_of_quoted_string< .map(Token::as_list) .map_or(Ok(None), |r| r.map(Some))?; let rust_value = rust_value_getter(rust_node); - match (value, rust_value) { + // TODO: Seems we are needlessly coverting to a vec here. + let rust_value: Option> = rust_value.map(|it| it.collect()); + match (value, &rust_value) { (None, None) => {} (None, Some(_)) | (Some(_), None) => { let this_status = DiffStatus::Bad; diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 3c6da09f..137441e1 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -3090,7 +3090,7 @@ fn compare_org_macro<'b, 's>( |r| if r.macro_args.is_empty() { None } else { - Some(&r.macro_args) + Some(r.get_macro_args()) }, compare_property_list_of_quoted_string ) diff --git a/src/types/object.rs b/src/types/object.rs index f27324fb..1fd8539d 100644 --- a/src/types/object.rs +++ b/src/types/object.rs @@ -1,6 +1,7 @@ use std::borrow::Borrow; use std::borrow::Cow; +use super::util::coalesce_whitespace; use super::util::coalesce_whitespace_if_line_break; use super::util::remove_line_break; use super::util::remove_whitespace_if_line_break; @@ -149,6 +150,9 @@ pub struct AngleLink<'s> { pub struct OrgMacro<'s> { pub source: &'s str, pub macro_name: &'s str, + /// The macro args from the source. + /// + /// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_macro_args` for an equivalent value. pub macro_args: Vec<&'s str>, pub macro_value: &'s str, } @@ -733,3 +737,9 @@ impl<'s> AngleLink<'s> { self.search_option.map(remove_whitespace_if_line_break) } } + +impl<'s> OrgMacro<'s> { + pub fn get_macro_args<'b>(&'b self) -> impl Iterator> + 'b { + self.macro_args.iter().map(|arg| coalesce_whitespace(*arg)) + } +} diff --git a/src/types/util.rs b/src/types/util.rs index c336474a..bb7deda4 100644 --- a/src/types/util.rs +++ b/src/types/util.rs @@ -197,3 +197,49 @@ enum CoalesceWhitespaceIfLineBreakState { ret: String, }, } + +/// Removes all whitespace from a string. +/// +/// Example: "foo bar" => "foobar" and "foo \n bar" => "foobar". +pub(crate) fn coalesce_whitespace<'s>(input: &'s str) -> Cow<'s, str> { + let mut state = CoalesceWhitespace::Normal; + for (offset, c) in input.char_indices() { + match (&mut state, c) { + (CoalesceWhitespace::Normal, ' ' | '\t' | '\r' | '\n') => { + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..offset]); + ret.push(' '); + state = CoalesceWhitespace::HasWhitespace { + in_whitespace: true, + ret, + }; + } + (CoalesceWhitespace::Normal, _) => {} + ( + CoalesceWhitespace::HasWhitespace { in_whitespace, ret }, + ' ' | '\t' | '\r' | '\n', + ) => { + if !*in_whitespace { + *in_whitespace = true; + ret.push(' '); + } + } + (CoalesceWhitespace::HasWhitespace { in_whitespace, ret }, _) => { + *in_whitespace = false; + ret.push(c); + } + } + } + match state { + CoalesceWhitespace::Normal => Cow::Borrowed(input), + CoalesceWhitespace::HasWhitespace { + in_whitespace: _, + ret, + } => Cow::Owned(ret), + } +} + +enum CoalesceWhitespace { + Normal, + HasWhitespace { in_whitespace: bool, ret: String }, +} From a6adeee40b18abb06c43dac708e7d1368c47e87e Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 8 Oct 2023 15:54:56 -0400 Subject: [PATCH 04/10] Handle escaping the characters in org macro arguments. --- org_mode_samples/object/macro/escape.org | 7 ++ src/compare/compare_field.rs | 4 +- src/types/util.rs | 146 +++++++++++++++++++++++ 3 files changed, 155 insertions(+), 2 deletions(-) create mode 100644 org_mode_samples/object/macro/escape.org diff --git a/org_mode_samples/object/macro/escape.org b/org_mode_samples/object/macro/escape.org new file mode 100644 index 00000000..312f3d4a --- /dev/null +++ b/org_mode_samples/object/macro/escape.org @@ -0,0 +1,7 @@ +{{{foo}}} + +{{{fo\o}}} + +{{{foo(b\ar)}}} + +{{{foo(b\,r)}}} diff --git a/src/compare/compare_field.rs b/src/compare/compare_field.rs index 3173ea8e..8cc41381 100644 --- a/src/compare/compare_field.rs +++ b/src/compare/compare_field.rs @@ -149,8 +149,8 @@ pub(crate) fn compare_property_list_of_quoted_string< if e != r { let this_status = DiffStatus::Bad; let message = Some(format!( - "{} mismatch (emacs != rust) {:?} != {:?}", - emacs_field, value, rust_value + "{} mismatch (emacs != rust) {:?} != {:?}. Full list: {:?} != {:?}", + emacs_field, e, r, value, rust_value )); return Ok(Some((this_status, message))); } diff --git a/src/types/util.rs b/src/types/util.rs index bb7deda4..e8980ea5 100644 --- a/src/types/util.rs +++ b/src/types/util.rs @@ -243,3 +243,149 @@ enum CoalesceWhitespace { Normal, HasWhitespace { in_whitespace: bool, ret: String }, } + +/// Removes all whitespace from a string and handle escaping characters. +/// +/// Example: "foo bar" => "foobar" and "foo \n bar" => "foobar" but if the escape character is backslash and comma is an escapable character than "foo\,bar" becomes "foo,bar". +pub(crate) fn coalesce_whitespace_escaped<'c, C: Fn(char) -> bool>( + escape_character: char, + escapable_characters: C, +) -> impl for<'s> Fn(&'s str) -> Cow<'s, str> { + move |input| impl_coalesce_whitespace_escaped(input, escape_character, &escapable_characters) +} + +fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( + input: &'s str, + escape_character: char, + escapable_characters: C, +) -> Cow<'s, str> { + let mut state = CoalesceWhitespaceEscaped::Normal; + for (offset, c) in input.char_indices() { + state = match (state, c) { + (CoalesceWhitespaceEscaped::Normal, c) if c == escape_character => { + CoalesceWhitespaceEscaped::NormalEscaping { + escape_offset: offset, + } + } + (CoalesceWhitespaceEscaped::Normal, ' ' | '\t' | '\r' | '\n') => { + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..offset]); + ret.push(' '); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: true, + ret, + } + } + (CoalesceWhitespaceEscaped::Normal, _) => CoalesceWhitespaceEscaped::Normal, + (CoalesceWhitespaceEscaped::NormalEscaping { escape_offset }, c) + if escapable_characters(c) => + { + // We escaped a character so we need mutation + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..escape_offset]); + ret.push(c); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: false, + ret, + } + } + + ( + CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ }, + ' ' | '\t' | '\r' | '\n', + ) => { + // We didn't escape the character but we hit whitespace anyway. + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..offset]); + ret.push(' '); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: true, + ret, + } + } + (CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ }, _) => { + // We didn't escape the character so continue as normal. + CoalesceWhitespaceEscaped::Normal + } + + ( + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: _, + ret, + }, + c, + ) if c == escape_character => { + CoalesceWhitespaceEscaped::RequiresMutationEscaping { ret } + } + ( + CoalesceWhitespaceEscaped::RequiresMutation { + mut in_whitespace, + mut ret, + }, + ' ' | '\t' | '\r' | '\n', + ) => { + if !in_whitespace { + in_whitespace = true; + ret.push(' '); + } + CoalesceWhitespaceEscaped::RequiresMutation { in_whitespace, ret } + } + ( + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace, + mut ret, + }, + _, + ) => { + ret.push(c); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: false, + ret, + } + } + (CoalesceWhitespaceEscaped::RequiresMutationEscaping { mut ret }, c) + if escapable_characters(c) => + { + ret.push(c); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: false, + ret, + } + } + ( + CoalesceWhitespaceEscaped::RequiresMutationEscaping { mut ret }, + ' ' | '\t' | '\r' | '\n', + ) => { + ret.push(' '); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: true, + ret, + } + } + (CoalesceWhitespaceEscaped::RequiresMutationEscaping { mut ret }, c) => { + ret.push(escape_character); + ret.push(c); + // TODO + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: false, + ret, + } + } + } + } + // match state { + // CoalesceWhitespaceEscaped::Normal => Cow::Borrowed(input), + // CoalesceWhitespaceEscaped::RequiresMutation { + // in_whitespace: _, + // ret, + // } => Cow::Owned(ret), + // } + todo!() +} + +enum CoalesceWhitespaceEscaped { + Normal, + NormalEscaping { escape_offset: usize }, + RequiresMutation { in_whitespace: bool, ret: String }, + RequiresMutationEscaping { ret: String }, +} From 41aa0349a07e63540bd50bcaf1c5731e123d809c Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 8 Oct 2023 16:06:52 -0400 Subject: [PATCH 05/10] Add tests for coalesce_whitespace_escaped. --- src/types/util.rs | 122 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 101 insertions(+), 21 deletions(-) diff --git a/src/types/util.rs b/src/types/util.rs index e8980ea5..7eab912c 100644 --- a/src/types/util.rs +++ b/src/types/util.rs @@ -314,9 +314,10 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( ret, }, c, - ) if c == escape_character => { - CoalesceWhitespaceEscaped::RequiresMutationEscaping { ret } - } + ) if c == escape_character => CoalesceWhitespaceEscaped::RequiresMutationEscaping { + ret, + matched_escape_character: c, + }, ( CoalesceWhitespaceEscaped::RequiresMutation { mut in_whitespace, @@ -332,7 +333,7 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( } ( CoalesceWhitespaceEscaped::RequiresMutation { - in_whitespace, + in_whitespace: _, mut ret, }, _, @@ -343,9 +344,13 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( ret, } } - (CoalesceWhitespaceEscaped::RequiresMutationEscaping { mut ret }, c) - if escapable_characters(c) => - { + ( + CoalesceWhitespaceEscaped::RequiresMutationEscaping { + mut ret, + matched_escape_character: _, + }, + c, + ) if escapable_characters(c) => { ret.push(c); CoalesceWhitespaceEscaped::RequiresMutation { in_whitespace: false, @@ -353,7 +358,10 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( } } ( - CoalesceWhitespaceEscaped::RequiresMutationEscaping { mut ret }, + CoalesceWhitespaceEscaped::RequiresMutationEscaping { + mut ret, + matched_escape_character: _, + }, ' ' | '\t' | '\r' | '\n', ) => { ret.push(' '); @@ -362,8 +370,14 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( ret, } } - (CoalesceWhitespaceEscaped::RequiresMutationEscaping { mut ret }, c) => { - ret.push(escape_character); + ( + CoalesceWhitespaceEscaped::RequiresMutationEscaping { + mut ret, + matched_escape_character, + }, + c, + ) => { + ret.push(matched_escape_character); ret.push(c); // TODO CoalesceWhitespaceEscaped::RequiresMutation { @@ -373,19 +387,85 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( } } } - // match state { - // CoalesceWhitespaceEscaped::Normal => Cow::Borrowed(input), - // CoalesceWhitespaceEscaped::RequiresMutation { - // in_whitespace: _, - // ret, - // } => Cow::Owned(ret), - // } - todo!() + match state { + CoalesceWhitespaceEscaped::Normal => Cow::Borrowed(input), + CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ } => Cow::Borrowed(input), + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: _, + ret, + } => Cow::Owned(ret), + CoalesceWhitespaceEscaped::RequiresMutationEscaping { + mut ret, + matched_escape_character, + } => { + ret.push(matched_escape_character); + Cow::Owned(ret) + } + } } enum CoalesceWhitespaceEscaped { Normal, - NormalEscaping { escape_offset: usize }, - RequiresMutation { in_whitespace: bool, ret: String }, - RequiresMutationEscaping { ret: String }, + NormalEscaping { + escape_offset: usize, + }, + RequiresMutation { + in_whitespace: bool, + ret: String, + }, + RequiresMutationEscaping { + ret: String, + matched_escape_character: char, + }, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn coalesce_whitespace_escaped_default() -> Result<(), Box> { + let input = "foobarbaz"; + let output = coalesce_whitespace_escaped('&', |c| "".contains(c))(input); + assert_eq!(output, "foobarbaz"); + assert!(matches!(output, Cow::Borrowed(_))); + Ok(()) + } + + #[test] + fn coalesce_whitespace_escaped_whitespace_single() -> Result<(), Box> { + let input = "foo bar baz"; + let output = coalesce_whitespace_escaped('&', |c| "".contains(c))(input); + assert_eq!(output, "foo bar baz"); + // TODO: Technically this should be a Borrowed but to keep the code simple for now we are treating all whitespace as causing ownership. + assert!(matches!(output, Cow::Owned(_))); + Ok(()) + } + + #[test] + fn coalesce_whitespace_escaped_whitespace_double() -> Result<(), Box> { + let input = "foo bar baz"; + let output = coalesce_whitespace_escaped('&', |c| "".contains(c))(input); + assert_eq!(output, "foo bar baz"); + assert!(matches!(output, Cow::Owned(_))); + Ok(()) + } + + #[test] + fn coalesce_whitespace_escaped_escape_match() -> Result<(), Box> { + let input = "foo &bar baz"; + let output = coalesce_whitespace_escaped('&', |c| "b".contains(c))(input); + assert_eq!(output, "foo bar baz"); + assert!(matches!(output, Cow::Owned(_))); + Ok(()) + } + + #[test] + fn coalesce_whitespace_escaped_escape_mismatch() -> Result<(), Box> { + let input = "foo b&ar baz"; + let output = coalesce_whitespace_escaped('&', |c| "b".contains(c))(input); + assert_eq!(output, "foo b&ar baz"); + assert!(matches!(output, Cow::Owned(_))); + Ok(()) + } } From 17c745ee71295c76bd595f0a6a5f90bba6754346 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 8 Oct 2023 16:15:49 -0400 Subject: [PATCH 06/10] Improve coalesce_whitespace_escaped to borrow when single spaces are used. --- src/types/util.rs | 62 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 11 deletions(-) diff --git a/src/types/util.rs b/src/types/util.rs index 7eab912c..6480ed74 100644 --- a/src/types/util.rs +++ b/src/types/util.rs @@ -259,15 +259,33 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( escape_character: char, escapable_characters: C, ) -> Cow<'s, str> { - let mut state = CoalesceWhitespaceEscaped::Normal; + let mut state = CoalesceWhitespaceEscaped::Normal { + in_whitespace: false, + }; for (offset, c) in input.char_indices() { state = match (state, c) { - (CoalesceWhitespaceEscaped::Normal, c) if c == escape_character => { + (CoalesceWhitespaceEscaped::Normal { in_whitespace: _ }, c) + if c == escape_character => + { CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: offset, } } - (CoalesceWhitespaceEscaped::Normal, ' ' | '\t' | '\r' | '\n') => { + (CoalesceWhitespaceEscaped::Normal { in_whitespace }, ' ') => { + if in_whitespace { + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..offset]); + CoalesceWhitespaceEscaped::RequiresMutation { + in_whitespace: true, + ret, + } + } else { + CoalesceWhitespaceEscaped::Normal { + in_whitespace: true, + } + } + } + (CoalesceWhitespaceEscaped::Normal { in_whitespace: _ }, '\t' | '\r' | '\n') => { let mut ret = String::with_capacity(input.len()); ret.push_str(&input[..offset]); ret.push(' '); @@ -276,7 +294,11 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( ret, } } - (CoalesceWhitespaceEscaped::Normal, _) => CoalesceWhitespaceEscaped::Normal, + (CoalesceWhitespaceEscaped::Normal { in_whitespace: _ }, _) => { + CoalesceWhitespaceEscaped::Normal { + in_whitespace: false, + } + } (CoalesceWhitespaceEscaped::NormalEscaping { escape_offset }, c) if escapable_characters(c) => { @@ -290,9 +312,15 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( } } + (CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ }, ' ') => { + // We didn't escape the character so continue as normal. + CoalesceWhitespaceEscaped::Normal { + in_whitespace: true, + } + } ( CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ }, - ' ' | '\t' | '\r' | '\n', + '\t' | '\r' | '\n', ) => { // We didn't escape the character but we hit whitespace anyway. let mut ret = String::with_capacity(input.len()); @@ -305,7 +333,9 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( } (CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ }, _) => { // We didn't escape the character so continue as normal. - CoalesceWhitespaceEscaped::Normal + CoalesceWhitespaceEscaped::Normal { + in_whitespace: false, + } } ( @@ -379,7 +409,6 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( ) => { ret.push(matched_escape_character); ret.push(c); - // TODO CoalesceWhitespaceEscaped::RequiresMutation { in_whitespace: false, ret, @@ -388,7 +417,7 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( } } match state { - CoalesceWhitespaceEscaped::Normal => Cow::Borrowed(input), + CoalesceWhitespaceEscaped::Normal { in_whitespace: _ } => Cow::Borrowed(input), CoalesceWhitespaceEscaped::NormalEscaping { escape_offset: _ } => Cow::Borrowed(input), CoalesceWhitespaceEscaped::RequiresMutation { in_whitespace: _, @@ -405,7 +434,9 @@ fn impl_coalesce_whitespace_escaped<'s, C: Fn(char) -> bool>( } enum CoalesceWhitespaceEscaped { - Normal, + Normal { + in_whitespace: bool, + }, NormalEscaping { escape_offset: usize, }, @@ -437,8 +468,7 @@ mod tests { let input = "foo bar baz"; let output = coalesce_whitespace_escaped('&', |c| "".contains(c))(input); assert_eq!(output, "foo bar baz"); - // TODO: Technically this should be a Borrowed but to keep the code simple for now we are treating all whitespace as causing ownership. - assert!(matches!(output, Cow::Owned(_))); + assert!(matches!(output, Cow::Borrowed(_))); Ok(()) } @@ -468,4 +498,14 @@ mod tests { assert!(matches!(output, Cow::Owned(_))); Ok(()) } + + #[test] + fn coalesce_whitespace_escaped_escape_mismatch_around_whitespace( + ) -> Result<(), Box> { + let input = "foo& bar &baz"; + let output = coalesce_whitespace_escaped('&', |c| "z".contains(c))(input); + assert_eq!(output, "foo& bar &baz"); + assert!(matches!(output, Cow::Borrowed(_))); + Ok(()) + } } From dd009498dddfc91596c048393a8a455355265179 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 8 Oct 2023 16:19:25 -0400 Subject: [PATCH 07/10] Switch to using coalesce_whitespace_escaped for macro args. --- src/types/object.rs | 6 ++++-- src/types/util.rs | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/types/object.rs b/src/types/object.rs index 1fd8539d..59198efa 100644 --- a/src/types/object.rs +++ b/src/types/object.rs @@ -1,7 +1,7 @@ use std::borrow::Borrow; use std::borrow::Cow; -use super::util::coalesce_whitespace; +use super::util::coalesce_whitespace_escaped; use super::util::coalesce_whitespace_if_line_break; use super::util::remove_line_break; use super::util::remove_whitespace_if_line_break; @@ -740,6 +740,8 @@ impl<'s> AngleLink<'s> { impl<'s> OrgMacro<'s> { pub fn get_macro_args<'b>(&'b self) -> impl Iterator> + 'b { - self.macro_args.iter().map(|arg| coalesce_whitespace(*arg)) + self.macro_args + .iter() + .map(|arg| coalesce_whitespace_escaped('\\', |c| ",".contains(c))(*arg)) } } diff --git a/src/types/util.rs b/src/types/util.rs index 6480ed74..d7e998d7 100644 --- a/src/types/util.rs +++ b/src/types/util.rs @@ -201,6 +201,7 @@ enum CoalesceWhitespaceIfLineBreakState { /// Removes all whitespace from a string. /// /// Example: "foo bar" => "foobar" and "foo \n bar" => "foobar". +#[allow(dead_code)] pub(crate) fn coalesce_whitespace<'s>(input: &'s str) -> Cow<'s, str> { let mut state = CoalesceWhitespace::Normal; for (offset, c) in input.char_indices() { From b9ead09dded0a0c366729855fb68673aebd241e0 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 8 Oct 2023 16:34:28 -0400 Subject: [PATCH 08/10] Add test showing we are not handling trailing whitespace in macro args properly. --- org_mode_samples/object/macro/whitespace_in_args.org | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/org_mode_samples/object/macro/whitespace_in_args.org b/org_mode_samples/object/macro/whitespace_in_args.org index 1b105031..518d4d8b 100644 --- a/org_mode_samples/object/macro/whitespace_in_args.org +++ b/org_mode_samples/object/macro/whitespace_in_args.org @@ -2,3 +2,9 @@ {{{foo(bar baz)}}} + +{{{foo(foo )}}} + +{{{foo(foo , bar )}}} + +{{{foo(foo , bar , baz )}}} From 9bdec391f143f1b808092add0a2eba3992bf4f3a Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 8 Oct 2023 16:39:22 -0400 Subject: [PATCH 09/10] Do not capture trailing whitespace on the final macro arg. --- src/parser/org_macro.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/parser/org_macro.rs b/src/parser/org_macro.rs index 4cc6e045..dcc15661 100644 --- a/src/parser/org_macro.rs +++ b/src/parser/org_macro.rs @@ -1,11 +1,13 @@ use nom::bytes::complete::tag; use nom::character::complete::anychar; +use nom::character::complete::space0; use nom::combinator::not; use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::verify; use nom::multi::many0; use nom::multi::separated_list0; +use nom::sequence::tuple; use super::org_source::OrgSource; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; @@ -67,7 +69,7 @@ fn org_macro_args<'b, 'g, 'r, 's>( let (remaining, _) = tag("(")(input)?; let (remaining, args) = separated_list0(tag(","), parser_with_context!(org_macro_arg)(context))(remaining)?; - let (remaining, _) = tag(")")(remaining)?; + let (remaining, _) = tuple((space0, tag(")")))(remaining)?; Ok((remaining, args)) } @@ -82,6 +84,10 @@ fn org_macro_arg<'b, 'g, 'r, 's>( loop { not(parser_with_context!(exit_matcher_parser)(context))(remaining)?; not(peek(tag("}}}")))(remaining)?; + if peek(tuple((space0::, CustomError<_>>, tag(")"))))(remaining).is_ok() { + break; + } + let (new_remaining, next_char) = anychar(remaining)?; if escaping { remaining = new_remaining; From f07d041eb9d4a972ec7035aab4bdb8384430ef29 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Sun, 8 Oct 2023 16:51:44 -0400 Subject: [PATCH 10/10] Fix handling capitalization in macro names. --- org_mode_samples/object/macro/capitalize.org | 1 + src/compare/diff.rs | 8 +++---- src/parser/org_macro.rs | 6 ++--- src/types/object.rs | 25 ++++++++++++++------ src/types/util.rs | 8 +++++++ 5 files changed, 34 insertions(+), 14 deletions(-) create mode 100644 org_mode_samples/object/macro/capitalize.org diff --git a/org_mode_samples/object/macro/capitalize.org b/org_mode_samples/object/macro/capitalize.org new file mode 100644 index 00000000..8bbd61d7 --- /dev/null +++ b/org_mode_samples/object/macro/capitalize.org @@ -0,0 +1 @@ +{{{Foo(Bar,Baz)}}} diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 137441e1..5afb6e08 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -3077,20 +3077,20 @@ fn compare_org_macro<'b, 's>( rust, ( EmacsField::Required(":key"), - |r| Some(r.macro_name), + |r| Some(r.get_key()), compare_property_quoted_string ), ( EmacsField::Required(":value"), - |r| Some(r.macro_value), + |r| Some(r.value), compare_property_quoted_string ), ( EmacsField::Required(":args"), - |r| if r.macro_args.is_empty() { + |r| if r.args.is_empty() { None } else { - Some(r.get_macro_args()) + Some(r.get_args()) }, compare_property_list_of_quoted_string ) diff --git a/src/parser/org_macro.rs b/src/parser/org_macro.rs index dcc15661..9ead92ec 100644 --- a/src/parser/org_macro.rs +++ b/src/parser/org_macro.rs @@ -37,13 +37,13 @@ pub(crate) fn org_macro<'b, 'g, 'r, 's>( remaining, OrgMacro { source: source.into(), - macro_name: macro_name.into(), - macro_args: macro_args + key: macro_name.into(), + args: macro_args .unwrap_or_else(|| Vec::with_capacity(0)) .into_iter() .map(|arg| arg.into()) .collect(), - macro_value: Into::<&str>::into(macro_value), + value: Into::<&str>::into(macro_value), }, )) } diff --git a/src/types/object.rs b/src/types/object.rs index 59198efa..22420433 100644 --- a/src/types/object.rs +++ b/src/types/object.rs @@ -5,6 +5,7 @@ use super::util::coalesce_whitespace_escaped; use super::util::coalesce_whitespace_if_line_break; use super::util::remove_line_break; use super::util::remove_whitespace_if_line_break; +use super::util::to_lowercase; use super::GetStandardProperties; use super::StandardProperties; @@ -149,12 +150,18 @@ pub struct AngleLink<'s> { #[derive(Debug, PartialEq)] pub struct OrgMacro<'s> { pub source: &'s str, - pub macro_name: &'s str, - /// The macro args from the source. + + /// The key from the source. /// - /// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_macro_args` for an equivalent value. - pub macro_args: Vec<&'s str>, - pub macro_value: &'s str, + /// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_key` for an equivalent value. + pub key: &'s str, + + /// The args from the source. + /// + /// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_args` for an equivalent value. + pub args: Vec<&'s str>, + + pub value: &'s str, } #[derive(Debug, PartialEq)] @@ -739,8 +746,12 @@ impl<'s> AngleLink<'s> { } impl<'s> OrgMacro<'s> { - pub fn get_macro_args<'b>(&'b self) -> impl Iterator> + 'b { - self.macro_args + pub fn get_key<'b>(&'b self) -> Cow<'s, str> { + to_lowercase(self.key) + } + + pub fn get_args<'b>(&'b self) -> impl Iterator> + 'b { + self.args .iter() .map(|arg| coalesce_whitespace_escaped('\\', |c| ",".contains(c))(*arg)) } diff --git a/src/types/util.rs b/src/types/util.rs index d7e998d7..99cf5ed8 100644 --- a/src/types/util.rs +++ b/src/types/util.rs @@ -451,6 +451,14 @@ enum CoalesceWhitespaceEscaped { }, } +pub(crate) fn to_lowercase<'s>(input: &'s str) -> Cow<'s, str> { + if input.chars().any(|c| !c.is_lowercase()) { + Cow::Owned(input.to_lowercase()) + } else { + Cow::Borrowed(input) + } +} + #[cfg(test)] mod tests { use super::*;