diff --git a/org_mode_samples/object/angle_link/application.org b/org_mode_samples/object/angle_link/application.org new file mode 100644 index 0000000..2463473 --- /dev/null +++ b/org_mode_samples/object/angle_link/application.org @@ -0,0 +1,2 @@ + + diff --git a/org_mode_samples/object/angle_link/code_ref_link.org b/org_mode_samples/object/angle_link/code_ref_link.org new file mode 100644 index 0000000..0de621d --- /dev/null +++ b/org_mode_samples/object/angle_link/code_ref_link.org @@ -0,0 +1,13 @@ +<(foo)> + +<((bar))> + +<((baz)> + +<(lo +rem)> + +# These become fuzzy +<(foo) > +< (foo)> +<(foo)::3> diff --git a/org_mode_samples/object/angle_link/custom_id_link.org b/org_mode_samples/object/angle_link/custom_id_link.org new file mode 100644 index 0000000..89c9511 --- /dev/null +++ b/org_mode_samples/object/angle_link/custom_id_link.org @@ -0,0 +1,6 @@ +<#foo> + +<#fo +o> + +<#foo::3> diff --git a/org_mode_samples/object/angle_link/elisp.org b/org_mode_samples/object/angle_link/elisp.org new file mode 100644 index 0000000..3797082 --- /dev/null +++ b/org_mode_samples/object/angle_link/elisp.org @@ -0,0 +1 @@ + diff --git a/org_mode_samples/object/angle_link/file_link.org b/org_mode_samples/object/angle_link/file_link.org new file mode 100644 index 0000000..c2e8bb8 --- /dev/null +++ b/org_mode_samples/object/angle_link/file_link.org @@ -0,0 +1,21 @@ +<./simple.org> +<../simple.org> + + + + + + + + + + + + + + + diff --git a/org_mode_samples/object/angle_link/fuzzy_link.org b/org_mode_samples/object/angle_link/fuzzy_link.org new file mode 100644 index 0000000..ab7844f --- /dev/null +++ b/org_mode_samples/object/angle_link/fuzzy_link.org @@ -0,0 +1,6 @@ + + + + + diff --git a/org_mode_samples/object/angle_link/id_link.org b/org_mode_samples/object/angle_link/id_link.org new file mode 100644 index 0000000..cc17de4 --- /dev/null +++ b/org_mode_samples/object/angle_link/id_link.org @@ -0,0 +1,6 @@ + + + + + diff --git a/org_mode_samples/object/angle_link/multiple_slashes.org b/org_mode_samples/object/angle_link/multiple_slashes.org new file mode 100644 index 0000000..9ddc6b3 --- /dev/null +++ b/org_mode_samples/object/angle_link/multiple_slashes.org @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/org_mode_samples/object/angle_link/protocol_link.org b/org_mode_samples/object/angle_link/protocol_link.org new file mode 100644 index 0000000..bb1ff75 --- /dev/null +++ b/org_mode_samples/object/angle_link/protocol_link.org @@ -0,0 +1,6 @@ + + + + + diff --git a/org_mode_samples/object/angle_link/search_option.org b/org_mode_samples/object/angle_link/search_option.org new file mode 100644 index 0000000..d0d07ed --- /dev/null +++ b/org_mode_samples/object/angle_link/search_option.org @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + diff --git a/org_mode_samples/object/angle_link/simple.org b/org_mode_samples/object/angle_link/simple.org new file mode 100644 index 0000000..4e2e8b8 --- /dev/null +++ b/org_mode_samples/object/angle_link/simple.org @@ -0,0 +1 @@ + diff --git a/org_mode_samples/object/angle_link/template.org b/org_mode_samples/object/angle_link/template.org new file mode 100644 index 0000000..b04d5b4 --- /dev/null +++ b/org_mode_samples/object/angle_link/template.org @@ -0,0 +1,6 @@ +#+LINK: foo https://foo.bar/baz#%s + + + +#+LINK: cat dog%s + diff --git a/org_mode_samples/object/angle_link/with_parenthesis.org b/org_mode_samples/object/angle_link/with_parenthesis.org new file mode 100644 index 0000000..f1e0401 --- /dev/null +++ b/org_mode_samples/object/angle_link/with_parenthesis.org @@ -0,0 +1 @@ + diff --git a/org_mode_samples/object/plain_link/search_option.org b/org_mode_samples/object/plain_link/search_option.org index d9e1631..7c39568 100644 --- a/org_mode_samples/object/plain_link/search_option.org +++ b/org_mode_samples/object/plain_link/search_option.org @@ -7,8 +7,7 @@ bar file:simple.org::foo::bar file:simple.org::/foo/ -# Does not become a search option because it is inside parenthesis. -https://en.wikipedia.org/wiki/Shebang_(Uni::x) +file://en.wikipedia.org/wiki/Shebang_(Uni::x) file:simple.org::* foo diff --git a/org_mode_samples/object/regular_link/all_default_links.org b/org_mode_samples/object/regular_link/all_default_links.org new file mode 100644 index 0000000..e09deb5 --- /dev/null +++ b/org_mode_samples/object/regular_link/all_default_links.org @@ -0,0 +1,25 @@ +non-link text +[[eww://foo]] +[[rmail://foo]] +[[mhe://foo]] +[[irc://foo]] +[[info://foo]] +[[gnus://foo]] +[[docview://foo]] +[[bibtex://foo]] +[[bbdb://foo]] +[[w3m://foo]] +[[doi://foo]] +[[file+sys://foo]] +[[file+emacs://foo]] +[[shell://foo]] +[[news://foo]] +[[mailto://foo]] +[[https://foo]] +[[http://foo]] +[[ftp://foo]] +[[help://foo]] +[[file://foo]] +[[elisp://foo]] +[[randomfakeprotocl://foo]] +non-link text diff --git a/org_mode_samples/object/regular_link/search_option.org b/org_mode_samples/object/regular_link/search_option.org index aba6af0..c273457 100644 --- a/org_mode_samples/object/regular_link/search_option.org +++ b/org_mode_samples/object/regular_link/search_option.org @@ -1,2 +1,16 @@ -# Does not become a search option because it is inside parenthesis. -[[https://en.wikipedia.org/wiki/Shebang_(Uni::x)]] +[[file:simple.org::foo]] + +[[file:simple.org::#foo]] +[[file:simple.org::foo bar]] +[[file:simple.org::foo +bar]] +[[file:simple.org::foo::bar]] +[[file:simple.org::/foo/]] + +[[file://en.wikipedia.org/wiki/Shebang_(Uni::x)]] + + +[[file:simple.org::*]] +[[file:simple.org::* foo]] +[[file:simple.org::*bar]] +[[file:simple.org::b*az]] diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 04ee74c..2f26f22 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -3002,10 +3002,55 @@ fn compare_angle_link<'b, 's>( emacs: &'b Token<'s>, rust: &'b AngleLink<'s>, ) -> Result, Box> { - let this_status = DiffStatus::Good; - let message = None; + let mut this_status = DiffStatus::Good; + let mut message = None; - // TODO: Compare :type :path :format :raw-link :application :search-option + if let Some((new_status, new_message)) = compare_properties!( + emacs, + rust, + ( + EmacsField::Required(":type"), + |r| { + match &r.link_type { + LinkType::File => Some(Cow::Borrowed("file")), + LinkType::Protocol(protocol) => Some(protocol.clone()), + LinkType::Id => Some(Cow::Borrowed("id")), + LinkType::CustomId => Some(Cow::Borrowed("custom-id")), + LinkType::CodeRef => Some(Cow::Borrowed("coderef")), + LinkType::Fuzzy => Some(Cow::Borrowed("fuzzy")), + } + }, + compare_property_quoted_string + ), + ( + EmacsField::Required(":path"), + |r| Some(r.get_path()), + compare_property_quoted_string + ), + ( + EmacsField::Required(":format"), + |_| Some("angle"), + compare_property_unquoted_atom + ), + ( + EmacsField::Required(":raw-link"), + |r| Some(r.raw_link), + compare_property_quoted_string + ), + ( + EmacsField::Required(":application"), + |r| r.application, + compare_property_quoted_string + ), + ( + EmacsField::Required(":search-option"), + |r| r.get_search_option(), + compare_property_quoted_string + ) + )? { + this_status = new_status; + message = new_message; + } Ok(DiffResult { status: this_status, diff --git a/src/parser/angle_link.rs b/src/parser/angle_link.rs index 113b622..cffa533 100644 --- a/src/parser/angle_link.rs +++ b/src/parser/angle_link.rs @@ -1,21 +1,33 @@ +use nom::branch::alt; use nom::bytes::complete::tag; -use nom::character::complete::anychar; +use nom::bytes::complete::take; +use nom::bytes::complete::take_until; +use nom::combinator::consumed; +use nom::combinator::flat_map; +use nom::combinator::map; +use nom::combinator::map_parser; +use nom::combinator::opt; use nom::combinator::peek; use nom::combinator::recognize; -use nom::multi::many_till; +use nom::combinator::rest; +use nom::combinator::verify; +use nom::multi::many1_count; +use nom::sequence::tuple; use super::org_source::OrgSource; use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting; +use super::util::text_until_exit; use crate::context::parser_with_context; use crate::context::ContextElement; use crate::context::ExitClass; use crate::context::ExitMatcherNode; use crate::context::RefContext; use crate::error::Res; +use crate::parser::plain_link::parse_file_and_application; use crate::parser::plain_link::protocol; -use crate::parser::util::exit_matcher_parser; use crate::parser::util::get_consumed; use crate::types::AngleLink; +use crate::types::LinkType; #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] pub(crate) fn angle_link<'b, 'g, 'r, 's>( @@ -23,9 +35,14 @@ pub(crate) fn angle_link<'b, 'g, 'r, 's>( input: OrgSource<'s>, ) -> Res, AngleLink<'s>> { let (remaining, _) = tag("<")(input)?; - let (remaining, proto) = protocol(context, remaining)?; - let (remaining, _separator) = tag(":")(remaining)?; - let (remaining, path) = path_angle(context, remaining)?; + let (remaining, (raw_link, parsed_link)) = consumed(map_parser( + recognize(tuple(( + parser_with_context!(protocol)(context), + tag(":"), + parser_with_context!(path_angle)(context), + ))), + parser_with_context!(parse_angle_link)(context), + ))(remaining)?; let (remaining, _) = tag(">")(remaining)?; let (remaining, _trailing_whitespace) = maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?; @@ -34,12 +51,23 @@ pub(crate) fn angle_link<'b, 'g, 'r, 's>( remaining, AngleLink { source: source.into(), - link_type: proto.into(), - path: path.into(), + link_type: parsed_link.link_type, + path: parsed_link.path, + raw_link: raw_link.into(), + search_option: parsed_link.search_option, + application: parsed_link.application, }, )) } +#[derive(Debug)] +struct PathAngle<'s> { + link_type: LinkType<'s>, + path: &'s str, + search_option: Option<&'s str>, + application: Option<&'s str>, +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn path_angle<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, @@ -51,9 +79,7 @@ fn path_angle<'b, 'g, 'r, 's>( }); let parser_context = context.with_additional_node(&parser_context); - let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context); - - let (remaining, path) = recognize(many_till(anychar, peek(exit_matcher)))(input)?; + let (remaining, path) = text_until_exit(&parser_context, input)?; Ok((remaining, path)) } @@ -64,3 +90,70 @@ fn path_angle_end<'b, 'g, 'r, 's>( ) -> Res, OrgSource<'s>> { tag(">")(input) } + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn parse_angle_link<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, PathAngle<'s>> { + alt(( + parser_with_context!(parse_file_angle_link)(context), + parser_with_context!(parse_protocol_angle_link)(context), + ))(input) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn parse_file_angle_link<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, PathAngle<'s>> { + let (remaining, application) = map( + tuple(( + peek(tag("file")), + map_parser( + parser_with_context!(protocol)(context), + parse_file_and_application, + ), + tag(":"), + )), + |(_, application, _)| application, + )(input)?; + let (remaining, _) = opt(flat_map( + peek(map(verify(many1_count(tag("/")), |c| *c >= 3), |c| c - 1)), + take, + ))(remaining)?; + let (remaining, path) = alt((take_until("::"), rest))(remaining)?; + let (remaining, search_option) = opt(map(tuple((tag("::"), rest)), |(_, search_option)| { + search_option + }))(remaining)?; + Ok(( + remaining, + PathAngle { + link_type: LinkType::File, + path: path.into(), + search_option: search_option.map(Into::<&str>::into), + application: application.map(Into::<&str>::into), + }, + )) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +fn parse_protocol_angle_link<'b, 'g, 'r, 's>( + context: RefContext<'b, 'g, 'r, 's>, + input: OrgSource<'s>, +) -> Res, PathAngle<'s>> { + let (remaining, link_type) = map( + tuple((parser_with_context!(protocol)(context), tag(":"))), + |(protocol, _)| LinkType::Protocol(protocol.into()), + )(input)?; + let (remaining, path) = rest(remaining)?; + Ok(( + remaining, + PathAngle { + link_type, + path: path.into(), + search_option: None, + application: None, + }, + )) +} diff --git a/src/parser/plain_link.rs b/src/parser/plain_link.rs index a226209..aa4f1a2 100644 --- a/src/parser/plain_link.rs +++ b/src/parser/plain_link.rs @@ -3,9 +3,11 @@ use nom::bytes::complete::is_not; use nom::bytes::complete::tag; use nom::bytes::complete::tag_no_case; use nom::bytes::complete::take; +use nom::bytes::complete::take_until; use nom::character::complete::anychar; use nom::character::complete::none_of; use nom::character::complete::one_of; +use nom::combinator::all_consuming; use nom::combinator::consumed; use nom::combinator::eof; use nom::combinator::flat_map; @@ -127,6 +129,44 @@ pub(crate) fn parse_file_and_application<'s>( Ok((remaining, application)) } +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub(crate) fn parse_path_and_search_option<'s>( + input: OrgSource<'s>, +) -> Res, (OrgSource<'s>, Option>)> { + alt(( + all_consuming(parse_path_and_search_option_with_search_option), + all_consuming(parse_path_and_search_option_without_search_option), + ))(input) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub(crate) fn parse_path_and_search_option_with_search_option<'s>( + input: OrgSource<'s>, +) -> Res, (OrgSource<'s>, Option>)> { + let (remaining, path) = take_until("::")(input)?; + let (remaining, search_option) = opt(map( + tuple(( + tag("::"), + verify(is_not(" \t\r\n"), |search_option| { + Into::<&str>::into(search_option) + .chars() + .any(char::is_alphanumeric) + }), + )), + |(_, search_option)| search_option, + ))(remaining)?; + // Assert we consumed the entire protocol. + not(anychar)(remaining)?; + Ok((remaining, (path, search_option))) +} + +#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] +pub(crate) fn parse_path_and_search_option_without_search_option<'s>( + input: OrgSource<'s>, +) -> Res, (OrgSource<'s>, Option>)> { + map(rest, |path| (path, None))(input) +} + #[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))] fn file_path_plain<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, @@ -138,30 +178,23 @@ fn file_path_plain<'b, 'g, 'r, 's>( exit_matcher: &path_plain_end, }); let parser_context = context.with_additional_node(&parser_context); - let (remaining, (raw_link, (_, application, _, _, path, search_option))) = consumed(tuple(( - peek(tag("file")), - map_parser( - parser_with_context!(protocol)(&parser_context), - parse_file_and_application, - ), - tag(":"), - opt(flat_map( - peek(map(verify(many1_count(tag("/")), |c| *c >= 3), |c| c - 1)), - take, - )), - parser_with_context!(path_plain)(&parser_context), - opt(map( - tuple(( - tag("::"), - verify(is_not(" \t\r\n"), |search_option| { - Into::<&str>::into(search_option) - .chars() - .any(char::is_alphanumeric) - }), + let (remaining, (raw_link, (_, application, _, _, (path, search_option)))) = + consumed(tuple(( + peek(tag("file")), + map_parser( + parser_with_context!(protocol)(&parser_context), + parse_file_and_application, + ), + tag(":"), + opt(flat_map( + peek(map(verify(many1_count(tag("/")), |c| *c >= 3), |c| c - 1)), + take, )), - |(_, search_option)| search_option, - )), - )))(input)?; + map_parser( + parser_with_context!(path_plain)(&parser_context), + parse_path_and_search_option, + ), + )))(input)?; Ok(( remaining, PathPlain { @@ -256,15 +289,9 @@ fn impl_path_plain_end<'b, 'g, 'r, 's>( context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, starting_parenthesis_depth: BracketDepth, - enable_search_option: bool, + _enable_search_option: bool, ) -> Res, OrgSource<'s>> { let current_depth = input.get_parenthesis_depth() - starting_parenthesis_depth; - if enable_search_option && current_depth == 0 { - let search_option = peek(tag("::"))(input); - if search_option.is_ok() { - return search_option; - } - } let (remaining, _leading_punctuation) = many0(verify(anychar, |c| { !" \t\r\n[]<>()/".contains(*c) && c.is_ascii_punctuation() diff --git a/src/parser/regular_link.rs b/src/parser/regular_link.rs index 0946ca2..199829c 100644 --- a/src/parser/regular_link.rs +++ b/src/parser/regular_link.rs @@ -285,17 +285,9 @@ fn file_path_reg<'b, 'g, 'r, 's>( take, )), parser_with_context!(text_until_exit)(&parser_context), - opt(map( - tuple(( - tag("::"), - verify(rest, |search_option| { - Into::<&str>::into(search_option) - .chars() - .any(char::is_alphanumeric) - }), - )), - |(_, search_option)| search_option, - )), + opt(map(tuple((tag("::"), rest)), |(_, search_option)| { + search_option + })), )))(input)?; Ok(( @@ -447,11 +439,10 @@ fn path_reg_end( fn impl_path_reg_end<'b, 'g, 'r, 's>( _context: RefContext<'b, 'g, 'r, 's>, input: OrgSource<'s>, - starting_parenthesis_depth: BracketDepth, + _starting_parenthesis_depth: BracketDepth, enable_search_option: bool, ) -> Res, OrgSource<'s>> { - let current_depth = input.get_parenthesis_depth() - starting_parenthesis_depth; - if enable_search_option && current_depth == 0 { + if enable_search_option { let search_option = peek(tag("::"))(input); if search_option.is_ok() { return search_option; diff --git a/src/types/mod.rs b/src/types/mod.rs index 63eb4ae..8148c0d 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -8,6 +8,7 @@ mod macros; mod object; mod source; mod standard_properties; +mod util; pub(crate) use ast_node::AstNode; pub use document::Document; pub use document::DocumentElement; diff --git a/src/types/object.rs b/src/types/object.rs index 16e69df..01435d4 100644 --- a/src/types/object.rs +++ b/src/types/object.rs @@ -1,6 +1,9 @@ use std::borrow::Borrow; use std::borrow::Cow; +use super::util::coalesce_whitespace_if_line_break; +use super::util::remove_line_break; +use super::util::remove_whitespace_if_line_break; use super::GetStandardProperties; use super::StandardProperties; @@ -81,9 +84,21 @@ pub struct PlainText<'s> { pub struct RegularLink<'s> { pub source: &'s str, pub link_type: LinkType<'s>, + /// The path after templates have been applied. + /// + /// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_raw_link` for an equivalent value. pub path: Cow<'s, str>, + + /// The raw link after templates have been applied. + /// + /// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_raw_link` for an equivalent value. pub raw_link: Cow<'s, str>, + + /// The search_option after templates have been applied. + /// + /// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_search_option` for an equivalent value. pub search_option: Option>, + pub children: Vec>, pub application: Option>, } @@ -115,8 +130,19 @@ pub struct PlainLink<'s> { #[derive(Debug, PartialEq)] pub struct AngleLink<'s> { pub source: &'s str, - pub link_type: &'s str, + pub link_type: LinkType<'s>, + + /// The path from the source. + /// + /// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_raw_link` for an equivalent value. pub path: &'s str, + pub raw_link: &'s str, + + /// The search_option from the source. + /// + /// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_search_option` for an equivalent value. + pub search_option: Option<&'s str>, + pub application: Option<&'s str>, } #[derive(Debug, PartialEq)] @@ -660,67 +686,28 @@ pub enum LinkType<'s> { Fuzzy, } -#[derive(Debug)] -enum ParserState { - Normal, - InWhitespace, -} - -/// Org-mode treats multiple consecutive whitespace characters as a single space. This function performs that transformation. -/// -/// Example: `orgify_text("foo \t\n bar") == "foo bar"` -pub(crate) fn orgify_text>(raw_text: T) -> String { - let raw_text = raw_text.as_ref(); - let mut ret = String::with_capacity(raw_text.len()); - let mut state = ParserState::Normal; - for c in raw_text.chars() { - state = match (&state, c) { - (ParserState::Normal, _) if " \t\r\n".contains(c) => { - ret.push(' '); - ParserState::InWhitespace - } - (ParserState::InWhitespace, _) if " \t\r\n".contains(c) => ParserState::InWhitespace, - (ParserState::Normal, _) => { - ret.push(c); - ParserState::Normal - } - (ParserState::InWhitespace, _) => { - ret.push(c); - ParserState::Normal - } - }; - } - ret -} - impl<'s> RegularLink<'s> { - /// Orgify the raw_link if it contains line breaks. - pub fn get_raw_link(&self) -> String { - if self.raw_link.contains('\n') { - orgify_text(Borrow::::borrow(&self.raw_link)) - } else { - self.raw_link.clone().into_owned() - } + /// Coalesce whitespace if the raw_link contains line breaks. + /// + /// This corresponds to the output you would get from the upstream emacs org-mode AST. + pub fn get_raw_link<'b>(&'b self) -> Cow<'b, str> { + coalesce_whitespace_if_line_break(&self.raw_link) } - /// Orgify the path if it contains line breaks. - pub fn get_path(&self) -> String { - if self.path.contains('\n') { - orgify_text(Borrow::::borrow(&self.path)) - } else { - self.path.clone().into_owned() - } + /// Coalesce whitespace if the path contains line breaks. + /// + /// This corresponds to the output you would get from the upstream emacs org-mode AST. + pub fn get_path<'b>(&'b self) -> Cow<'b, str> { + coalesce_whitespace_if_line_break(&self.path) } - /// Orgify the search_option if it contains line breaks. - pub fn get_search_option(&self) -> Option { - self.search_option.as_ref().map(|search_option| { - if search_option.contains('\n') { - orgify_text(search_option) - } else { - search_option.clone().into_owned() - } - }) + /// Coalesce whitespace if the search_option contains line breaks. + /// + /// This corresponds to the output you would get from the upstream emacs org-mode AST. + pub fn get_search_option<'b>(&'b self) -> Option> { + self.search_option + .as_ref() + .map(|search_option| coalesce_whitespace_if_line_break(search_option.borrow())) } } @@ -729,3 +716,19 @@ impl<'s> RadioLink<'s> { self.path } } + +impl<'s> AngleLink<'s> { + /// Remove line breaks but preserve multiple consecutive spaces. + /// + /// This corresponds to the output you would get from the upstream emacs org-mode AST. + pub fn get_path(&self) -> Cow<'s, str> { + remove_line_break(self.path) + } + + /// Remove all whitespace but only if search_option contains a line break. + /// + /// This corresponds to the output you would get from the upstream emacs org-mode AST. + pub fn get_search_option(&self) -> Option> { + self.search_option.map(remove_whitespace_if_line_break) + } +} diff --git a/src/types/util.rs b/src/types/util.rs new file mode 100644 index 0000000..c336474 --- /dev/null +++ b/src/types/util.rs @@ -0,0 +1,199 @@ +use std::borrow::Cow; + +/// Removes all whitespace from a string if any line breaks are present. +/// +/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foobar". +pub(crate) fn remove_whitespace_if_line_break<'s>(input: &'s str) -> Cow<'s, str> { + let mut state = RemoveWhitespaceIfLineBreakState::Normal; + for (offset, c) in input.char_indices() { + match (&mut state, c) { + (RemoveWhitespaceIfLineBreakState::Normal, '\n') => { + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..offset]); + state = RemoveWhitespaceIfLineBreakState::HasLineBreak(ret); + } + (RemoveWhitespaceIfLineBreakState::Normal, ' ' | '\t') => { + state = RemoveWhitespaceIfLineBreakState::HasWhitespace(offset); + } + (RemoveWhitespaceIfLineBreakState::Normal, _) => {} + (RemoveWhitespaceIfLineBreakState::HasWhitespace(first_whitespace_offset), '\n') => { + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..*first_whitespace_offset]); + for c in input[*first_whitespace_offset..offset].chars() { + if !c.is_ascii_whitespace() { + ret.push(c); + } + } + state = RemoveWhitespaceIfLineBreakState::HasLineBreak(ret); + } + (RemoveWhitespaceIfLineBreakState::HasWhitespace(_), _) => {} + (RemoveWhitespaceIfLineBreakState::HasLineBreak(_), ' ' | '\t' | '\r' | '\n') => {} + (RemoveWhitespaceIfLineBreakState::HasLineBreak(ret), _) => { + ret.push(c); + } + } + } + match state { + RemoveWhitespaceIfLineBreakState::Normal => Cow::Borrowed(input), + RemoveWhitespaceIfLineBreakState::HasWhitespace(_) => Cow::Borrowed(input), + RemoveWhitespaceIfLineBreakState::HasLineBreak(ret) => Cow::Owned(ret), + } +} + +enum RemoveWhitespaceIfLineBreakState { + Normal, + HasWhitespace(usize), + HasLineBreak(String), +} + +/// Removes all line breaks from a string +/// +/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foo bar". +pub(crate) fn remove_line_break<'s>(input: &'s str) -> Cow<'s, str> { + let mut state = RemoveLineBreakState::Normal; + for (offset, c) in input.char_indices() { + match (&mut state, c) { + (RemoveLineBreakState::Normal, '\n') => { + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..offset]); + state = RemoveLineBreakState::HasLineBreak(ret); + } + (RemoveLineBreakState::Normal, _) => {} + (RemoveLineBreakState::HasLineBreak(_), '\n') => {} + (RemoveLineBreakState::HasLineBreak(ret), _) => { + ret.push(c); + } + } + } + match state { + RemoveLineBreakState::Normal => Cow::Borrowed(input), + RemoveLineBreakState::HasLineBreak(ret) => Cow::Owned(ret), + } +} + +enum RemoveLineBreakState { + Normal, + HasLineBreak(String), +} + +/// Removes all whitespace from a string if any line breaks are present. +/// +/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foobar". +pub(crate) fn coalesce_whitespace_if_line_break<'s>(input: &'s str) -> Cow<'s, str> { + let mut state = CoalesceWhitespaceIfLineBreakState::Normal; + for (offset, c) in input.char_indices() { + match (&mut state, c) { + (CoalesceWhitespaceIfLineBreakState::Normal, '\n') => { + // Hit line break without any preceding whitespace + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..offset]); + ret.push(' '); + state = CoalesceWhitespaceIfLineBreakState::HasLineBreak { + in_whitespace: true, + ret, + }; + } + (CoalesceWhitespaceIfLineBreakState::Normal, ' ' | '\t') => { + state = CoalesceWhitespaceIfLineBreakState::HasWhitespace { + in_whitespace: true, + first_whitespace_offset: offset, + }; + } + (CoalesceWhitespaceIfLineBreakState::Normal, _) => {} + + ( + CoalesceWhitespaceIfLineBreakState::HasWhitespace { + in_whitespace, + first_whitespace_offset, + }, + '\n', + ) => { + // Hit line break with preceding whitespace so we add all the text up to the first whitespace and then process the remaining text coalescing the whitespace. + let mut ret = String::with_capacity(input.len()); + ret.push_str(&input[..*first_whitespace_offset]); + let mut sub_loop_in_whitespace = false; + for c in input[*first_whitespace_offset..offset].chars() { + if sub_loop_in_whitespace { + if !c.is_ascii_whitespace() { + // Preceding character was whitespace but this is not. + sub_loop_in_whitespace = false; + ret.push(c); + } + // Do nothing if preceding character was whitespace and this character also is whitespace. + } else { + if c.is_ascii_whitespace() { + // Preceding character was not whitespace but this is. + sub_loop_in_whitespace = true; + ret.push(' '); + } else { + // Preceding character was not whitespace and this is not either. + ret.push(c); + } + } + } + if !*in_whitespace { + // If this line break was the start of whitespace then we need to inject a space character for it. + ret.push(' '); + } + state = CoalesceWhitespaceIfLineBreakState::HasLineBreak { + in_whitespace: true, // This was triggered by a line break which is whitespace. + ret, + }; + } + ( + CoalesceWhitespaceIfLineBreakState::HasWhitespace { + in_whitespace, + first_whitespace_offset: _, + }, + ' ' | '\t', + ) => { + *in_whitespace = true; + } + ( + CoalesceWhitespaceIfLineBreakState::HasWhitespace { + in_whitespace, + first_whitespace_offset: _, + }, + _, + ) => { + *in_whitespace = false; + } + ( + CoalesceWhitespaceIfLineBreakState::HasLineBreak { in_whitespace, ret }, + ' ' | '\t' | '\r' | '\n', + ) => { + if !*in_whitespace { + ret.push(' '); + } + *in_whitespace = true; + } + (CoalesceWhitespaceIfLineBreakState::HasLineBreak { in_whitespace, ret }, _) => { + *in_whitespace = false; + ret.push(c); + } + } + } + match state { + CoalesceWhitespaceIfLineBreakState::Normal => Cow::Borrowed(input), + CoalesceWhitespaceIfLineBreakState::HasWhitespace { + in_whitespace: _, + first_whitespace_offset: _, + } => Cow::Borrowed(input), + CoalesceWhitespaceIfLineBreakState::HasLineBreak { + in_whitespace: _, + ret, + } => Cow::Owned(ret), + } +} + +enum CoalesceWhitespaceIfLineBreakState { + Normal, + HasWhitespace { + in_whitespace: bool, + first_whitespace_offset: usize, + }, + HasLineBreak { + in_whitespace: bool, + ret: String, + }, +}