Merge branch 'angle_link_properties'
All checks were successful
rustfmt Build rustfmt has succeeded
rust-build Build rust-build has succeeded
rust-test Build rust-test has succeeded
rust-foreign-document-test Build rust-foreign-document-test has succeeded

This commit is contained in:
Tom Alexander 2023-10-08 14:18:25 -04:00
commit 3f707149e3
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
23 changed files with 621 additions and 119 deletions

View File

@ -0,0 +1,2 @@
<file+sys://foo>
<file+emacs://foo>

View File

@ -0,0 +1,13 @@
<(foo)>
<((bar))>
<((baz)>
<(lo
rem)>
# These become fuzzy
<(foo) >
< (foo)>
<(foo)::3>

View File

@ -0,0 +1,6 @@
<#foo>
<#fo
o>
<#foo::3>

View File

@ -0,0 +1 @@
<elisp:(local-set-key "\M-\x" 'foo-bar-baz)>

View File

@ -0,0 +1,21 @@
<./simple.org>
<../simple.org>
</simple.org>
<file:simple.org>
<file:sim ple.org>
<file:simp
le.org>
<file:simple.org::3>
<file:simple.org::foo>
<file:simple.org::#foo>
<file:simple.org::foo bar>
<file:simple.org::foo
bar>
<file:simple.org::foo
bar>
<file:simple.org::foo
bar>
<file:simple.org::foo::bar>
<file:simple.org::/foo/>

View File

@ -0,0 +1,6 @@
<elisp.org>
<eli
sp.org>
<elisp.org::3>

View File

@ -0,0 +1,6 @@
<id:83986bdf-987c-465d-8851-44cb4c02a86c>
<id:83986bdf-987c-465d
-8851-44cb4c02a86c>
<id:83986bdf-987c-465d-8851-44cb4c02a86c::foo>

View File

@ -0,0 +1,20 @@
<file:foo>
<file:/bar>
<file://baz>
<file:///lorem>
<file:////ipsum>
<file://///dolar>
<foo>
</bar>
<//baz>
<///lorem>
<////ipsum>
</////dolar>
<https:foo>
<https:/bar>
<https://baz>
<https:///lorem>
<https:////ipsum>
<https://///dolar>

View File

@ -0,0 +1,6 @@
<shell:foo>
<shell:fo
o>
<shell:foo::3>

View File

@ -0,0 +1,16 @@
<file:simple.org::foo>
<file:simple.org::#foo>
<file:simple.org::foo bar>
<file:simple.org::foo
bar>
<file:simple.org::foo::bar>
<file:simple.org::/foo/>
<file://en.wikipedia.org/wiki/Shebang_(Uni::x)>
<file:simple.org::*>
<file:simple.org::* foo>
<file:simple.org::*bar>
<file:simple.org::b*az>

View File

@ -0,0 +1 @@
<https://fizz.buzz/>

View File

@ -0,0 +1,6 @@
#+LINK: foo https://foo.bar/baz#%s
<foo::lorem>
<cat::bat>
#+LINK: cat dog%s
<cat:bat>

View File

@ -0,0 +1 @@
<https://en.wikipedia.org/wiki/Shebang_(Unix)>

View File

@ -7,8 +7,7 @@ bar
file:simple.org::foo::bar
file:simple.org::/foo/
# Does not become a search option because it is inside parenthesis.
https://en.wikipedia.org/wiki/Shebang_(Uni::x)
file://en.wikipedia.org/wiki/Shebang_(Uni::x)
file:simple.org::* foo

View File

@ -0,0 +1,25 @@
non-link text
[[eww://foo]]
[[rmail://foo]]
[[mhe://foo]]
[[irc://foo]]
[[info://foo]]
[[gnus://foo]]
[[docview://foo]]
[[bibtex://foo]]
[[bbdb://foo]]
[[w3m://foo]]
[[doi://foo]]
[[file+sys://foo]]
[[file+emacs://foo]]
[[shell://foo]]
[[news://foo]]
[[mailto://foo]]
[[https://foo]]
[[http://foo]]
[[ftp://foo]]
[[help://foo]]
[[file://foo]]
[[elisp://foo]]
[[randomfakeprotocl://foo]]
non-link text

View File

@ -1,2 +1,16 @@
# Does not become a search option because it is inside parenthesis.
[[https://en.wikipedia.org/wiki/Shebang_(Uni::x)]]
[[file:simple.org::foo]]
[[file:simple.org::#foo]]
[[file:simple.org::foo bar]]
[[file:simple.org::foo
bar]]
[[file:simple.org::foo::bar]]
[[file:simple.org::/foo/]]
[[file://en.wikipedia.org/wiki/Shebang_(Uni::x)]]
[[file:simple.org::*]]
[[file:simple.org::* foo]]
[[file:simple.org::*bar]]
[[file:simple.org::b*az]]

View File

@ -3002,10 +3002,55 @@ fn compare_angle_link<'b, 's>(
emacs: &'b Token<'s>,
rust: &'b AngleLink<'s>,
) -> Result<DiffEntry<'b, 's>, Box<dyn std::error::Error>> {
let this_status = DiffStatus::Good;
let message = None;
let mut this_status = DiffStatus::Good;
let mut message = None;
// TODO: Compare :type :path :format :raw-link :application :search-option
if let Some((new_status, new_message)) = compare_properties!(
emacs,
rust,
(
EmacsField::Required(":type"),
|r| {
match &r.link_type {
LinkType::File => Some(Cow::Borrowed("file")),
LinkType::Protocol(protocol) => Some(protocol.clone()),
LinkType::Id => Some(Cow::Borrowed("id")),
LinkType::CustomId => Some(Cow::Borrowed("custom-id")),
LinkType::CodeRef => Some(Cow::Borrowed("coderef")),
LinkType::Fuzzy => Some(Cow::Borrowed("fuzzy")),
}
},
compare_property_quoted_string
),
(
EmacsField::Required(":path"),
|r| Some(r.get_path()),
compare_property_quoted_string
),
(
EmacsField::Required(":format"),
|_| Some("angle"),
compare_property_unquoted_atom
),
(
EmacsField::Required(":raw-link"),
|r| Some(r.raw_link),
compare_property_quoted_string
),
(
EmacsField::Required(":application"),
|r| r.application,
compare_property_quoted_string
),
(
EmacsField::Required(":search-option"),
|r| r.get_search_option(),
compare_property_quoted_string
)
)? {
this_status = new_status;
message = new_message;
}
Ok(DiffResult {
status: this_status,

View File

@ -1,21 +1,33 @@
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::anychar;
use nom::bytes::complete::take;
use nom::bytes::complete::take_until;
use nom::combinator::consumed;
use nom::combinator::flat_map;
use nom::combinator::map;
use nom::combinator::map_parser;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::multi::many_till;
use nom::combinator::rest;
use nom::combinator::verify;
use nom::multi::many1_count;
use nom::sequence::tuple;
use super::org_source::OrgSource;
use super::util::maybe_consume_object_trailing_whitespace_if_not_exiting;
use super::util::text_until_exit;
use crate::context::parser_with_context;
use crate::context::ContextElement;
use crate::context::ExitClass;
use crate::context::ExitMatcherNode;
use crate::context::RefContext;
use crate::error::Res;
use crate::parser::plain_link::parse_file_and_application;
use crate::parser::plain_link::protocol;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::types::AngleLink;
use crate::types::LinkType;
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub(crate) fn angle_link<'b, 'g, 'r, 's>(
@ -23,9 +35,14 @@ pub(crate) fn angle_link<'b, 'g, 'r, 's>(
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, AngleLink<'s>> {
let (remaining, _) = tag("<")(input)?;
let (remaining, proto) = protocol(context, remaining)?;
let (remaining, _separator) = tag(":")(remaining)?;
let (remaining, path) = path_angle(context, remaining)?;
let (remaining, (raw_link, parsed_link)) = consumed(map_parser(
recognize(tuple((
parser_with_context!(protocol)(context),
tag(":"),
parser_with_context!(path_angle)(context),
))),
parser_with_context!(parse_angle_link)(context),
))(remaining)?;
let (remaining, _) = tag(">")(remaining)?;
let (remaining, _trailing_whitespace) =
maybe_consume_object_trailing_whitespace_if_not_exiting(context, remaining)?;
@ -34,12 +51,23 @@ pub(crate) fn angle_link<'b, 'g, 'r, 's>(
remaining,
AngleLink {
source: source.into(),
link_type: proto.into(),
path: path.into(),
link_type: parsed_link.link_type,
path: parsed_link.path,
raw_link: raw_link.into(),
search_option: parsed_link.search_option,
application: parsed_link.application,
},
))
}
#[derive(Debug)]
struct PathAngle<'s> {
link_type: LinkType<'s>,
path: &'s str,
search_option: Option<&'s str>,
application: Option<&'s str>,
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn path_angle<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
@ -51,9 +79,7 @@ fn path_angle<'b, 'g, 'r, 's>(
});
let parser_context = context.with_additional_node(&parser_context);
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let (remaining, path) = recognize(many_till(anychar, peek(exit_matcher)))(input)?;
let (remaining, path) = text_until_exit(&parser_context, input)?;
Ok((remaining, path))
}
@ -64,3 +90,70 @@ fn path_angle_end<'b, 'g, 'r, 's>(
) -> Res<OrgSource<'s>, OrgSource<'s>> {
tag(">")(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn parse_angle_link<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, PathAngle<'s>> {
alt((
parser_with_context!(parse_file_angle_link)(context),
parser_with_context!(parse_protocol_angle_link)(context),
))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn parse_file_angle_link<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, PathAngle<'s>> {
let (remaining, application) = map(
tuple((
peek(tag("file")),
map_parser(
parser_with_context!(protocol)(context),
parse_file_and_application,
),
tag(":"),
)),
|(_, application, _)| application,
)(input)?;
let (remaining, _) = opt(flat_map(
peek(map(verify(many1_count(tag("/")), |c| *c >= 3), |c| c - 1)),
take,
))(remaining)?;
let (remaining, path) = alt((take_until("::"), rest))(remaining)?;
let (remaining, search_option) = opt(map(tuple((tag("::"), rest)), |(_, search_option)| {
search_option
}))(remaining)?;
Ok((
remaining,
PathAngle {
link_type: LinkType::File,
path: path.into(),
search_option: search_option.map(Into::<&str>::into),
application: application.map(Into::<&str>::into),
},
))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn parse_protocol_angle_link<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, PathAngle<'s>> {
let (remaining, link_type) = map(
tuple((parser_with_context!(protocol)(context), tag(":"))),
|(protocol, _)| LinkType::Protocol(protocol.into()),
)(input)?;
let (remaining, path) = rest(remaining)?;
Ok((
remaining,
PathAngle {
link_type,
path: path.into(),
search_option: None,
application: None,
},
))
}

View File

@ -3,9 +3,11 @@ use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::bytes::complete::take;
use nom::bytes::complete::take_until;
use nom::character::complete::anychar;
use nom::character::complete::none_of;
use nom::character::complete::one_of;
use nom::combinator::all_consuming;
use nom::combinator::consumed;
use nom::combinator::eof;
use nom::combinator::flat_map;
@ -127,6 +129,44 @@ pub(crate) fn parse_file_and_application<'s>(
Ok((remaining, application))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub(crate) fn parse_path_and_search_option<'s>(
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, (OrgSource<'s>, Option<OrgSource<'s>>)> {
alt((
all_consuming(parse_path_and_search_option_with_search_option),
all_consuming(parse_path_and_search_option_without_search_option),
))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub(crate) fn parse_path_and_search_option_with_search_option<'s>(
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, (OrgSource<'s>, Option<OrgSource<'s>>)> {
let (remaining, path) = take_until("::")(input)?;
let (remaining, search_option) = opt(map(
tuple((
tag("::"),
verify(is_not(" \t\r\n"), |search_option| {
Into::<&str>::into(search_option)
.chars()
.any(char::is_alphanumeric)
}),
)),
|(_, search_option)| search_option,
))(remaining)?;
// Assert we consumed the entire protocol.
not(anychar)(remaining)?;
Ok((remaining, (path, search_option)))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub(crate) fn parse_path_and_search_option_without_search_option<'s>(
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, (OrgSource<'s>, Option<OrgSource<'s>>)> {
map(rest, |path| (path, None))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn file_path_plain<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
@ -138,30 +178,23 @@ fn file_path_plain<'b, 'g, 'r, 's>(
exit_matcher: &path_plain_end,
});
let parser_context = context.with_additional_node(&parser_context);
let (remaining, (raw_link, (_, application, _, _, path, search_option))) = consumed(tuple((
peek(tag("file")),
map_parser(
parser_with_context!(protocol)(&parser_context),
parse_file_and_application,
),
tag(":"),
opt(flat_map(
peek(map(verify(many1_count(tag("/")), |c| *c >= 3), |c| c - 1)),
take,
)),
parser_with_context!(path_plain)(&parser_context),
opt(map(
tuple((
tag("::"),
verify(is_not(" \t\r\n"), |search_option| {
Into::<&str>::into(search_option)
.chars()
.any(char::is_alphanumeric)
}),
let (remaining, (raw_link, (_, application, _, _, (path, search_option)))) =
consumed(tuple((
peek(tag("file")),
map_parser(
parser_with_context!(protocol)(&parser_context),
parse_file_and_application,
),
tag(":"),
opt(flat_map(
peek(map(verify(many1_count(tag("/")), |c| *c >= 3), |c| c - 1)),
take,
)),
|(_, search_option)| search_option,
)),
)))(input)?;
map_parser(
parser_with_context!(path_plain)(&parser_context),
parse_path_and_search_option,
),
)))(input)?;
Ok((
remaining,
PathPlain {
@ -256,15 +289,9 @@ fn impl_path_plain_end<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
starting_parenthesis_depth: BracketDepth,
enable_search_option: bool,
_enable_search_option: bool,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let current_depth = input.get_parenthesis_depth() - starting_parenthesis_depth;
if enable_search_option && current_depth == 0 {
let search_option = peek(tag("::"))(input);
if search_option.is_ok() {
return search_option;
}
}
let (remaining, _leading_punctuation) = many0(verify(anychar, |c| {
!" \t\r\n[]<>()/".contains(*c) && c.is_ascii_punctuation()

View File

@ -285,17 +285,9 @@ fn file_path_reg<'b, 'g, 'r, 's>(
take,
)),
parser_with_context!(text_until_exit)(&parser_context),
opt(map(
tuple((
tag("::"),
verify(rest, |search_option| {
Into::<&str>::into(search_option)
.chars()
.any(char::is_alphanumeric)
}),
)),
|(_, search_option)| search_option,
)),
opt(map(tuple((tag("::"), rest)), |(_, search_option)| {
search_option
})),
)))(input)?;
Ok((
@ -447,11 +439,10 @@ fn path_reg_end(
fn impl_path_reg_end<'b, 'g, 'r, 's>(
_context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
starting_parenthesis_depth: BracketDepth,
_starting_parenthesis_depth: BracketDepth,
enable_search_option: bool,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let current_depth = input.get_parenthesis_depth() - starting_parenthesis_depth;
if enable_search_option && current_depth == 0 {
if enable_search_option {
let search_option = peek(tag("::"))(input);
if search_option.is_ok() {
return search_option;

View File

@ -8,6 +8,7 @@ mod macros;
mod object;
mod source;
mod standard_properties;
mod util;
pub(crate) use ast_node::AstNode;
pub use document::Document;
pub use document::DocumentElement;

View File

@ -1,6 +1,9 @@
use std::borrow::Borrow;
use std::borrow::Cow;
use super::util::coalesce_whitespace_if_line_break;
use super::util::remove_line_break;
use super::util::remove_whitespace_if_line_break;
use super::GetStandardProperties;
use super::StandardProperties;
@ -81,9 +84,21 @@ pub struct PlainText<'s> {
pub struct RegularLink<'s> {
pub source: &'s str,
pub link_type: LinkType<'s>,
/// The path after templates have been applied.
///
/// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_raw_link` for an equivalent value.
pub path: Cow<'s, str>,
/// The raw link after templates have been applied.
///
/// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_raw_link` for an equivalent value.
pub raw_link: Cow<'s, str>,
/// The search_option after templates have been applied.
///
/// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_search_option` for an equivalent value.
pub search_option: Option<Cow<'s, str>>,
pub children: Vec<Object<'s>>,
pub application: Option<Cow<'s, str>>,
}
@ -115,8 +130,19 @@ pub struct PlainLink<'s> {
#[derive(Debug, PartialEq)]
pub struct AngleLink<'s> {
pub source: &'s str,
pub link_type: &'s str,
pub link_type: LinkType<'s>,
/// The path from the source.
///
/// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_raw_link` for an equivalent value.
pub path: &'s str,
pub raw_link: &'s str,
/// The search_option from the source.
///
/// This does not take into account the post-processing that you would get from the upstream emacs org-mode AST. Use `get_search_option` for an equivalent value.
pub search_option: Option<&'s str>,
pub application: Option<&'s str>,
}
#[derive(Debug, PartialEq)]
@ -660,67 +686,28 @@ pub enum LinkType<'s> {
Fuzzy,
}
#[derive(Debug)]
enum ParserState {
Normal,
InWhitespace,
}
/// Org-mode treats multiple consecutive whitespace characters as a single space. This function performs that transformation.
///
/// Example: `orgify_text("foo \t\n bar") == "foo bar"`
pub(crate) fn orgify_text<T: AsRef<str>>(raw_text: T) -> String {
let raw_text = raw_text.as_ref();
let mut ret = String::with_capacity(raw_text.len());
let mut state = ParserState::Normal;
for c in raw_text.chars() {
state = match (&state, c) {
(ParserState::Normal, _) if " \t\r\n".contains(c) => {
ret.push(' ');
ParserState::InWhitespace
}
(ParserState::InWhitespace, _) if " \t\r\n".contains(c) => ParserState::InWhitespace,
(ParserState::Normal, _) => {
ret.push(c);
ParserState::Normal
}
(ParserState::InWhitespace, _) => {
ret.push(c);
ParserState::Normal
}
};
}
ret
}
impl<'s> RegularLink<'s> {
/// Orgify the raw_link if it contains line breaks.
pub fn get_raw_link(&self) -> String {
if self.raw_link.contains('\n') {
orgify_text(Borrow::<str>::borrow(&self.raw_link))
} else {
self.raw_link.clone().into_owned()
}
/// Coalesce whitespace if the raw_link contains line breaks.
///
/// This corresponds to the output you would get from the upstream emacs org-mode AST.
pub fn get_raw_link<'b>(&'b self) -> Cow<'b, str> {
coalesce_whitespace_if_line_break(&self.raw_link)
}
/// Orgify the path if it contains line breaks.
pub fn get_path(&self) -> String {
if self.path.contains('\n') {
orgify_text(Borrow::<str>::borrow(&self.path))
} else {
self.path.clone().into_owned()
}
/// Coalesce whitespace if the path contains line breaks.
///
/// This corresponds to the output you would get from the upstream emacs org-mode AST.
pub fn get_path<'b>(&'b self) -> Cow<'b, str> {
coalesce_whitespace_if_line_break(&self.path)
}
/// Orgify the search_option if it contains line breaks.
pub fn get_search_option(&self) -> Option<String> {
self.search_option.as_ref().map(|search_option| {
if search_option.contains('\n') {
orgify_text(search_option)
} else {
search_option.clone().into_owned()
}
})
/// Coalesce whitespace if the search_option contains line breaks.
///
/// This corresponds to the output you would get from the upstream emacs org-mode AST.
pub fn get_search_option<'b>(&'b self) -> Option<Cow<'b, str>> {
self.search_option
.as_ref()
.map(|search_option| coalesce_whitespace_if_line_break(search_option.borrow()))
}
}
@ -729,3 +716,19 @@ impl<'s> RadioLink<'s> {
self.path
}
}
impl<'s> AngleLink<'s> {
/// Remove line breaks but preserve multiple consecutive spaces.
///
/// This corresponds to the output you would get from the upstream emacs org-mode AST.
pub fn get_path(&self) -> Cow<'s, str> {
remove_line_break(self.path)
}
/// Remove all whitespace but only if search_option contains a line break.
///
/// This corresponds to the output you would get from the upstream emacs org-mode AST.
pub fn get_search_option(&self) -> Option<Cow<'s, str>> {
self.search_option.map(remove_whitespace_if_line_break)
}
}

199
src/types/util.rs Normal file
View File

@ -0,0 +1,199 @@
use std::borrow::Cow;
/// Removes all whitespace from a string if any line breaks are present.
///
/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foobar".
pub(crate) fn remove_whitespace_if_line_break<'s>(input: &'s str) -> Cow<'s, str> {
let mut state = RemoveWhitespaceIfLineBreakState::Normal;
for (offset, c) in input.char_indices() {
match (&mut state, c) {
(RemoveWhitespaceIfLineBreakState::Normal, '\n') => {
let mut ret = String::with_capacity(input.len());
ret.push_str(&input[..offset]);
state = RemoveWhitespaceIfLineBreakState::HasLineBreak(ret);
}
(RemoveWhitespaceIfLineBreakState::Normal, ' ' | '\t') => {
state = RemoveWhitespaceIfLineBreakState::HasWhitespace(offset);
}
(RemoveWhitespaceIfLineBreakState::Normal, _) => {}
(RemoveWhitespaceIfLineBreakState::HasWhitespace(first_whitespace_offset), '\n') => {
let mut ret = String::with_capacity(input.len());
ret.push_str(&input[..*first_whitespace_offset]);
for c in input[*first_whitespace_offset..offset].chars() {
if !c.is_ascii_whitespace() {
ret.push(c);
}
}
state = RemoveWhitespaceIfLineBreakState::HasLineBreak(ret);
}
(RemoveWhitespaceIfLineBreakState::HasWhitespace(_), _) => {}
(RemoveWhitespaceIfLineBreakState::HasLineBreak(_), ' ' | '\t' | '\r' | '\n') => {}
(RemoveWhitespaceIfLineBreakState::HasLineBreak(ret), _) => {
ret.push(c);
}
}
}
match state {
RemoveWhitespaceIfLineBreakState::Normal => Cow::Borrowed(input),
RemoveWhitespaceIfLineBreakState::HasWhitespace(_) => Cow::Borrowed(input),
RemoveWhitespaceIfLineBreakState::HasLineBreak(ret) => Cow::Owned(ret),
}
}
enum RemoveWhitespaceIfLineBreakState {
Normal,
HasWhitespace(usize),
HasLineBreak(String),
}
/// Removes all line breaks from a string
///
/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foo bar".
pub(crate) fn remove_line_break<'s>(input: &'s str) -> Cow<'s, str> {
let mut state = RemoveLineBreakState::Normal;
for (offset, c) in input.char_indices() {
match (&mut state, c) {
(RemoveLineBreakState::Normal, '\n') => {
let mut ret = String::with_capacity(input.len());
ret.push_str(&input[..offset]);
state = RemoveLineBreakState::HasLineBreak(ret);
}
(RemoveLineBreakState::Normal, _) => {}
(RemoveLineBreakState::HasLineBreak(_), '\n') => {}
(RemoveLineBreakState::HasLineBreak(ret), _) => {
ret.push(c);
}
}
}
match state {
RemoveLineBreakState::Normal => Cow::Borrowed(input),
RemoveLineBreakState::HasLineBreak(ret) => Cow::Owned(ret),
}
}
enum RemoveLineBreakState {
Normal,
HasLineBreak(String),
}
/// Removes all whitespace from a string if any line breaks are present.
///
/// Example: "foo bar" => "foo bar" but "foo \n bar" => "foobar".
pub(crate) fn coalesce_whitespace_if_line_break<'s>(input: &'s str) -> Cow<'s, str> {
let mut state = CoalesceWhitespaceIfLineBreakState::Normal;
for (offset, c) in input.char_indices() {
match (&mut state, c) {
(CoalesceWhitespaceIfLineBreakState::Normal, '\n') => {
// Hit line break without any preceding whitespace
let mut ret = String::with_capacity(input.len());
ret.push_str(&input[..offset]);
ret.push(' ');
state = CoalesceWhitespaceIfLineBreakState::HasLineBreak {
in_whitespace: true,
ret,
};
}
(CoalesceWhitespaceIfLineBreakState::Normal, ' ' | '\t') => {
state = CoalesceWhitespaceIfLineBreakState::HasWhitespace {
in_whitespace: true,
first_whitespace_offset: offset,
};
}
(CoalesceWhitespaceIfLineBreakState::Normal, _) => {}
(
CoalesceWhitespaceIfLineBreakState::HasWhitespace {
in_whitespace,
first_whitespace_offset,
},
'\n',
) => {
// Hit line break with preceding whitespace so we add all the text up to the first whitespace and then process the remaining text coalescing the whitespace.
let mut ret = String::with_capacity(input.len());
ret.push_str(&input[..*first_whitespace_offset]);
let mut sub_loop_in_whitespace = false;
for c in input[*first_whitespace_offset..offset].chars() {
if sub_loop_in_whitespace {
if !c.is_ascii_whitespace() {
// Preceding character was whitespace but this is not.
sub_loop_in_whitespace = false;
ret.push(c);
}
// Do nothing if preceding character was whitespace and this character also is whitespace.
} else {
if c.is_ascii_whitespace() {
// Preceding character was not whitespace but this is.
sub_loop_in_whitespace = true;
ret.push(' ');
} else {
// Preceding character was not whitespace and this is not either.
ret.push(c);
}
}
}
if !*in_whitespace {
// If this line break was the start of whitespace then we need to inject a space character for it.
ret.push(' ');
}
state = CoalesceWhitespaceIfLineBreakState::HasLineBreak {
in_whitespace: true, // This was triggered by a line break which is whitespace.
ret,
};
}
(
CoalesceWhitespaceIfLineBreakState::HasWhitespace {
in_whitespace,
first_whitespace_offset: _,
},
' ' | '\t',
) => {
*in_whitespace = true;
}
(
CoalesceWhitespaceIfLineBreakState::HasWhitespace {
in_whitespace,
first_whitespace_offset: _,
},
_,
) => {
*in_whitespace = false;
}
(
CoalesceWhitespaceIfLineBreakState::HasLineBreak { in_whitespace, ret },
' ' | '\t' | '\r' | '\n',
) => {
if !*in_whitespace {
ret.push(' ');
}
*in_whitespace = true;
}
(CoalesceWhitespaceIfLineBreakState::HasLineBreak { in_whitespace, ret }, _) => {
*in_whitespace = false;
ret.push(c);
}
}
}
match state {
CoalesceWhitespaceIfLineBreakState::Normal => Cow::Borrowed(input),
CoalesceWhitespaceIfLineBreakState::HasWhitespace {
in_whitespace: _,
first_whitespace_offset: _,
} => Cow::Borrowed(input),
CoalesceWhitespaceIfLineBreakState::HasLineBreak {
in_whitespace: _,
ret,
} => Cow::Owned(ret),
}
}
enum CoalesceWhitespaceIfLineBreakState {
Normal,
HasWhitespace {
in_whitespace: bool,
first_whitespace_offset: usize,
},
HasLineBreak {
in_whitespace: bool,
ret: String,
},
}