From d126488891a4b9d10d5a47670e429c0858969db8 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Fri, 6 Oct 2023 18:30:08 -0400 Subject: [PATCH] Handle orgifying text in regular link path and raw-link. --- .../object/regular_link/code_ref_link.org | 3 + .../object/regular_link/custom_id_link.org | 3 + .../object/regular_link/file_link.org | 3 + .../object/regular_link/fuzzy_link.org | 3 + .../object/regular_link/id_link.org | 3 + .../object/regular_link/protocol_link.org | 3 + src/compare/compare_field.rs | 66 ++++++++++++++++++- src/compare/diff.rs | 20 +++--- src/parser/regular_link.rs | 15 +++++ src/types/object.rs | 42 ++++++++++++ 10 files changed, 151 insertions(+), 10 deletions(-) diff --git a/org_mode_samples/object/regular_link/code_ref_link.org b/org_mode_samples/object/regular_link/code_ref_link.org index 6e99b14f..6b924e69 100644 --- a/org_mode_samples/object/regular_link/code_ref_link.org +++ b/org_mode_samples/object/regular_link/code_ref_link.org @@ -4,6 +4,9 @@ [[((baz)]] +[[(lo +rem)]] + # These become fuzzy [[(foo) ]] [[ (foo)]] diff --git a/org_mode_samples/object/regular_link/custom_id_link.org b/org_mode_samples/object/regular_link/custom_id_link.org index ba1c67ef..a721579e 100644 --- a/org_mode_samples/object/regular_link/custom_id_link.org +++ b/org_mode_samples/object/regular_link/custom_id_link.org @@ -1 +1,4 @@ [[#foo]] + +[[#fo +o]] diff --git a/org_mode_samples/object/regular_link/file_link.org b/org_mode_samples/object/regular_link/file_link.org index 23b996a4..2364f57b 100644 --- a/org_mode_samples/object/regular_link/file_link.org +++ b/org_mode_samples/object/regular_link/file_link.org @@ -1 +1,4 @@ [[file:simple.org]] + +[[file:simp +le.org]] diff --git a/org_mode_samples/object/regular_link/fuzzy_link.org b/org_mode_samples/object/regular_link/fuzzy_link.org index d9512a71..64fffaa9 100644 --- a/org_mode_samples/object/regular_link/fuzzy_link.org +++ b/org_mode_samples/object/regular_link/fuzzy_link.org @@ -1 +1,4 @@ [[elisp.org]] + +[[eli +sp.org]] diff --git a/org_mode_samples/object/regular_link/id_link.org b/org_mode_samples/object/regular_link/id_link.org index 04bbe50b..65bf4c5e 100644 --- a/org_mode_samples/object/regular_link/id_link.org +++ b/org_mode_samples/object/regular_link/id_link.org @@ -1 +1,4 @@ [[id:83986bdf-987c-465d-8851-44cb4c02a86c]] + +[[id:83986bdf-987c-465d +-8851-44cb4c02a86c]] diff --git a/org_mode_samples/object/regular_link/protocol_link.org b/org_mode_samples/object/regular_link/protocol_link.org index 9bb79955..bd9bbad1 100644 --- a/org_mode_samples/object/regular_link/protocol_link.org +++ b/org_mode_samples/object/regular_link/protocol_link.org @@ -1 +1,4 @@ [[shell:foo]] + +[[shell:fo +o]] diff --git a/src/compare/compare_field.rs b/src/compare/compare_field.rs index 5a263237..56cc6d81 100644 --- a/src/compare/compare_field.rs +++ b/src/compare/compare_field.rs @@ -2,7 +2,9 @@ use std::fmt::Debug; use super::diff::DiffStatus; use super::sexp::Token; +use super::util::get_property; use super::util::get_property_quoted_string; +use super::util::get_property_unquoted_atom; #[derive(Debug)] pub(crate) enum EmacsField<'s> { @@ -32,6 +34,28 @@ pub(crate) fn compare_identity() -> () { () } +/// Assert that the emacs value is always nil or absent. +/// +/// This is usually used for fields which, in my testing, are always nil. Using this compare function instead of simply doing a compare_noop will enable us to be alerted when we finally come across an org-mode document that has a value other than nil for the property. +pub(crate) fn compare_property_always_nil<'b, 's, 'x, R, RG>( + emacs: &'b Token<'s>, + _rust_node: R, + emacs_field: &'x str, + _rust_value_getter: RG, +) -> Result)>, Box> { + let value = get_property(emacs, emacs_field)?; + if value.is_some() { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} was expected to always be nil: {:?}", + emacs_field, value + )); + Ok(Some((this_status, message))) + } else { + Ok(None) + } +} + pub(crate) fn compare_property_quoted_string<'b, 's, 'x, R, RG: Fn(R) -> Option<&'s str>>( emacs: &'b Token<'s>, rust_node: R, @@ -40,7 +64,47 @@ pub(crate) fn compare_property_quoted_string<'b, 's, 'x, R, RG: Fn(R) -> Option< ) -> Result)>, Box> { let value = get_property_quoted_string(emacs, emacs_field)?; let rust_value = rust_value_getter(rust_node); - if !rust_value.eq(&value.as_ref().map(String::as_str)) { + if rust_value != value.as_ref().map(String::as_str) { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}", + emacs_field, value, rust_value + )); + Ok(Some((this_status, message))) + } else { + Ok(None) + } +} + +pub(crate) fn compare_property_quoted_string_owned<'b, 's, 'x, R, RG: Fn(R) -> Option>( + emacs: &'b Token<'s>, + rust_node: R, + emacs_field: &'x str, + rust_value_getter: RG, +) -> Result)>, Box> { + let value = get_property_quoted_string(emacs, emacs_field)?; + let rust_value = rust_value_getter(rust_node); + if rust_value != value { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}", + emacs_field, value, rust_value + )); + Ok(Some((this_status, message))) + } else { + Ok(None) + } +} + +pub(crate) fn compare_property_unquoted_atom<'b, 's, 'x, R, RG: Fn(R) -> Option<&'s str>>( + emacs: &'b Token<'s>, + rust_node: R, + emacs_field: &'x str, + rust_value_getter: RG, +) -> Result)>, Box> { + let value = get_property_unquoted_atom(emacs, emacs_field)?; + let rust_value = rust_value_getter(rust_node); + if rust_value != value { let this_status = DiffStatus::Bad; let message = Some(format!( "{} mismatch (emacs != rust) {:?} != {:?}", diff --git a/src/compare/diff.rs b/src/compare/diff.rs index c43b4d0d..5178e906 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -4,8 +4,10 @@ use std::collections::BTreeSet; use std::collections::HashSet; use super::compare_field::compare_identity; -use super::compare_field::compare_noop; +use super::compare_field::compare_property_always_nil; use super::compare_field::compare_property_quoted_string; +use super::compare_field::compare_property_quoted_string_owned; +use super::compare_field::compare_property_unquoted_atom; use super::elisp_fact::ElispFact; use super::elisp_fact::GetElispFact; use super::sexp::unquote; @@ -2783,28 +2785,28 @@ fn compare_regular_link<'b, 's>( ), ( EmacsField::Required(":path"), - |r| Some(r.path), - compare_property_quoted_string + |r| Some(r.get_path()), + compare_property_quoted_string_owned ), ( EmacsField::Required(":format"), - compare_identity, - compare_noop + |_| Some("bracket"), + compare_property_unquoted_atom ), ( EmacsField::Required(":raw-link"), - |r| Some(r.raw_link), - compare_property_quoted_string + |r| Some(r.get_raw_link()), + compare_property_quoted_string_owned ), ( EmacsField::Required(":application"), compare_identity, - compare_noop + compare_property_always_nil ), ( EmacsField::Required(":search-option"), compare_identity, - compare_noop + compare_property_always_nil ) )? { this_status = new_status; diff --git a/src/parser/regular_link.rs b/src/parser/regular_link.rs index 414be404..dcf99a26 100644 --- a/src/parser/regular_link.rs +++ b/src/parser/regular_link.rs @@ -2,6 +2,7 @@ use nom::branch::alt; use nom::bytes::complete::escaped; use nom::bytes::complete::tag; use nom::bytes::complete::take_till1; +use nom::bytes::complete::take_until; use nom::character::complete::anychar; use nom::combinator::consumed; use nom::combinator::eof; @@ -116,6 +117,7 @@ fn parse_path_reg<'s>(input: OrgSource<'s>) -> Res, PathReg<'s>> { id_path_reg, custom_id_path_reg, code_ref_path_reg, + protocol_path_reg, fuzzy_path_reg, ))(input) } @@ -172,6 +174,19 @@ fn code_ref_path_reg<'s>(input: OrgSource<'s>) -> Res, PathReg<'s> )) } +fn protocol_path_reg<'s>(input: OrgSource<'s>) -> Res, PathReg<'s>> { + let (remaining, (raw_link, (protocol, _, path))) = + consumed(tuple((take_until(":"), tag(":"), rest)))(input)?; + Ok(( + remaining, + PathReg { + link_type: LinkType::Protocol(protocol.into()), + path: path.into(), + raw_link: raw_link.into(), + }, + )) +} + fn fuzzy_path_reg<'s>(input: OrgSource<'s>) -> Res, PathReg<'s>> { let (remaining, body) = rest(input)?; Ok(( diff --git a/src/types/object.rs b/src/types/object.rs index bd0af47c..f4831116 100644 --- a/src/types/object.rs +++ b/src/types/object.rs @@ -648,3 +648,45 @@ pub enum LinkType<'s> { CodeRef, Fuzzy, } + +#[derive(Debug)] +enum ParserState { + Normal, + InWhitespace, +} + +/// Org-mode treats multiple consecutive whitespace characters as a single space. This function performs that transformation. +/// +/// Example: `orgify_text("foo \t\n bar") == "foo bar"` +pub(crate) fn orgify_text<'s>(raw_text: &'s str) -> String { + let mut ret = String::with_capacity(raw_text.len()); + let mut state = ParserState::Normal; + for c in raw_text.chars() { + state = match (&state, c) { + (ParserState::Normal, _) if " \t\r\n".contains(c) => { + ret.push(' '); + ParserState::InWhitespace + } + (ParserState::InWhitespace, _) if " \t\r\n".contains(c) => ParserState::InWhitespace, + (ParserState::Normal, _) => { + ret.push(c); + ParserState::Normal + } + (ParserState::InWhitespace, _) => { + ret.push(c); + ParserState::Normal + } + }; + } + ret +} + +impl<'s> RegularLink<'s> { + pub fn get_raw_link(&self) -> String { + orgify_text(self.raw_link) + } + + pub fn get_path(&self) -> String { + orgify_text(self.path) + } +}