diff --git a/org_mode_samples/plain_link/all_default_links.org b/org_mode_samples/plain_link/all_default_links.org new file mode 100644 index 0000000..8ceca6f --- /dev/null +++ b/org_mode_samples/plain_link/all_default_links.org @@ -0,0 +1,26 @@ +non-link text +id://foo +eww://foo +rmail://foo +mhe://foo +irc://foo +info://foo +gnus://foo +docview://foo +bibtex://foo +bbdb://foo +w3m://foo +doi://foo +file+sys://foo +file+emacs://foo +shell://foo +news://foo +mailto://foo +https://foo +http://foo +ftp://foo +help://foo +file://foo +elisp://foo +randomfakeprotocl://foo +non-link text diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 2a95673..91a896b 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -24,6 +24,7 @@ use crate::parser::Keyword; use crate::parser::LatexEnvironment; use crate::parser::Object; use crate::parser::Paragraph; +use crate::parser::PlainLink; use crate::parser::PlainList; use crate::parser::PlainListItem; use crate::parser::PlainText; @@ -148,6 +149,7 @@ fn compare_object<'s>( Object::RegularLink(obj) => compare_regular_link(source, emacs, obj), Object::RadioLink(obj) => compare_radio_link(source, emacs, obj), Object::RadioTarget(obj) => compare_radio_target(source, emacs, obj), + Object::PlainLink(obj) => compare_plain_link(source, emacs, obj), } } @@ -1163,3 +1165,26 @@ fn compare_radio_target<'s>( children: Vec::new(), }) } + +fn compare_plain_link<'s>( + source: &'s str, + emacs: &'s Token<'s>, + rust: &'s PlainLink<'s>, +) -> Result> { + let mut this_status = DiffStatus::Good; + let emacs_name = "link"; + if assert_name(emacs, emacs_name).is_err() { + this_status = DiffStatus::Bad; + } + + if assert_bounds(source, emacs, rust).is_err() { + this_status = DiffStatus::Bad; + } + + Ok(DiffResult { + status: this_status, + name: emacs_name.to_owned(), + message: None, + children: Vec::new(), + }) +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e89ceb4..89f3a75 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -22,6 +22,7 @@ mod object_parser; mod paragraph; mod parser_context; mod parser_with_context; +mod plain_link; mod plain_list; mod plain_text; mod planning; @@ -67,6 +68,7 @@ pub use object::Bold; pub use object::Code; pub use object::Italic; pub use object::Object; +pub use object::PlainLink; pub use object::PlainText; pub use object::RadioLink; pub use object::RadioTarget; diff --git a/src/parser/object.rs b/src/parser/object.rs index 29075b9..cef816c 100644 --- a/src/parser/object.rs +++ b/src/parser/object.rs @@ -5,6 +5,7 @@ pub enum Object<'s> { RegularLink(RegularLink<'s>), RadioLink(RadioLink<'s>), RadioTarget(RadioTarget<'s>), + PlainLink(PlainLink<'s>), Bold(Bold<'s>), Italic(Italic<'s>), Underline(Underline<'s>), @@ -72,6 +73,13 @@ pub struct RadioLink<'s> { pub children: Vec>, } +#[derive(Debug, PartialEq)] +pub struct PlainLink<'s> { + pub source: &'s str, + pub link_type: &'s str, + pub path: &'s str, +} + impl<'s> Source<'s> for Object<'s> { fn get_source(&'s self) -> &'s str { match self { @@ -85,6 +93,7 @@ impl<'s> Source<'s> for Object<'s> { Object::RegularLink(obj) => obj.source, Object::RadioLink(obj) => obj.source, Object::RadioTarget(obj) => obj.source, + Object::PlainLink(obj) => obj.source, } } } @@ -142,3 +151,9 @@ impl<'s> Source<'s> for RadioTarget<'s> { self.source } } + +impl<'s> Source<'s> for PlainLink<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} diff --git a/src/parser/object_parser.rs b/src/parser/object_parser.rs index d4fca5f..3b29a6c 100644 --- a/src/parser/object_parser.rs +++ b/src/parser/object_parser.rs @@ -8,6 +8,7 @@ use super::regular_link::regular_link; use super::Context; use crate::error::Res; use crate::parser::object::Object; +use crate::parser::plain_link::plain_link; use crate::parser::radio_link::radio_link; use crate::parser::radio_link::radio_target; use crate::parser::text_markup::text_markup; @@ -31,6 +32,7 @@ pub fn standard_set_object<'r, 's>( parser_with_context!(regular_link)(context), Object::RegularLink, ), + map(parser_with_context!(plain_link)(context), Object::PlainLink), map(parser_with_context!(plain_text)(context), Object::PlainText), ))(input) } @@ -66,6 +68,7 @@ pub fn any_object_except_plain_text<'r, 's>( parser_with_context!(regular_link)(context), Object::RegularLink, ), + map(parser_with_context!(plain_link)(context), Object::PlainLink), ))(input) } diff --git a/src/parser/plain_link.rs b/src/parser/plain_link.rs new file mode 100644 index 0000000..c102ea6 --- /dev/null +++ b/src/parser/plain_link.rs @@ -0,0 +1,96 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::bytes::complete::tag_no_case; +use nom::bytes::complete::take_while; +use nom::character::complete::none_of; +use nom::combinator::eof; +use nom::combinator::peek; +use nom::combinator::recognize; + +use super::Context; +use crate::error::CustomError; +use crate::error::MyError; +use crate::error::Res; +use crate::parser::object::PlainLink; +use crate::parser::parser_with_context::parser_with_context; +use crate::parser::util::get_consumed; +use crate::parser::util::get_one_before; +use crate::parser::util::WORD_CONSTITUENT_CHARACTERS; + +#[tracing::instrument(ret, level = "debug")] +pub fn plain_link<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, PlainLink<'s>> { + let (remaining, _) = pre(context, input)?; + let (remaining, proto) = protocol(context, remaining)?; + let (remaining, _separator) = tag(":")(remaining)?; + let (remaining, path) = path_plain(context, remaining)?; + peek(parser_with_context!(post)(context))(remaining)?; + let source = get_consumed(input, remaining); + Ok((remaining, PlainLink { source, link_type: proto, path })) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn pre<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { + let document_root = context.get_document_root().unwrap(); + let preceding_character = get_one_before(document_root, input) + .map(|slice| slice.chars().next()) + .flatten(); + match preceding_character { + // If None, we are at the start of the file which is fine + None => {} + Some(x) if !WORD_CONSTITUENT_CHARACTERS.contains(x) => {} + Some(_) => { + // Not at start of line, cannot be a heading + return Err(nom::Err::Error(CustomError::MyError(MyError( + "Not a valid pre character for plain link.", + )))); + } + }; + Ok((input, ())) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn post<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, ()> { + let (remaining, _) = alt((eof, recognize(none_of(WORD_CONSTITUENT_CHARACTERS))))(input)?; + Ok((remaining, ())) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn protocol<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + let (remaining, proto) = alt(( + alt(( + tag_no_case("id"), + tag_no_case("eww"), + tag_no_case("rmail"), + tag_no_case("mhe"), + tag_no_case("irc"), + tag_no_case("info"), + tag_no_case("gnus"), + tag_no_case("docview"), + tag_no_case("bibtex"), + tag_no_case("bbdb"), + tag_no_case("w3m"), + )), + alt(( + tag_no_case("doi"), + tag_no_case("file+sys"), + tag_no_case("file+emacs"), + tag_no_case("shell"), + tag_no_case("news"), + tag_no_case("mailto"), + tag_no_case("https"), + tag_no_case("http"), + tag_no_case("ftp"), + tag_no_case("help"), + tag_no_case("file"), + tag_no_case("elisp"), + )), + ))(input)?; + Ok((remaining, proto)) +} + +#[tracing::instrument(ret, level = "debug")] +pub fn path_plain<'r, 's>(context: Context<'r, 's>, input: &'s str) -> Res<&'s str, &'s str> { + // TODO: "optionally containing parenthesis-wrapped non-whitespace non-bracket substrings up to a depth of two. The string must end with either a non-punctation non-whitespace character, a forwards slash, or a parenthesis-wrapped substring" + take_while(|c| !" \t\r\n()[]<>".contains(c))(input) + // recognize(many1(none_of(" \t\r\n()[]<>")))(input) +}