organic/src/parser/plain_link.rs
Tom Alexander 720afa5d32
Update getting the previous character and previous line.
This can be done a lot more efficiently now that we are keeping track of this information in the wrapped input type instead of having to fetch to the original document out of the context tree.
2023-08-24 16:56:07 -04:00

133 lines
4.7 KiB
Rust

use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::character::complete::anychar;
use nom::character::complete::none_of;
use nom::character::complete::one_of;
use nom::combinator::eof;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::multi::many_till;
use super::org_source::OrgSource;
use super::Context;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::exiting::ExitClass;
use crate::parser::object::PlainLink;
use crate::parser::parser_context::ContextElement;
use crate::parser::parser_context::ExitMatcherNode;
use crate::parser::parser_with_context::parser_with_context;
use crate::parser::util::exit_matcher_parser;
use crate::parser::util::get_consumed;
use crate::parser::util::WORD_CONSTITUENT_CHARACTERS;
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn plain_link<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, PlainLink<'s>> {
let (remaining, _) = pre(context, input)?;
let (remaining, proto) = protocol(context, remaining)?;
let (remaining, _separator) = tag(":")(remaining)?;
let (remaining, path) = path_plain(context, remaining)?;
peek(parser_with_context!(post)(context))(remaining)?;
let source = get_consumed(input, remaining);
Ok((
remaining,
PlainLink {
source: source.into(),
link_type: proto.into(),
path: path.into(),
},
))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn pre<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
let preceding_character = input.get_preceding_character();
match preceding_character {
// If None, we are at the start of the file which is fine
None => {}
Some(x) if !WORD_CONSTITUENT_CHARACTERS.contains(x) => {}
Some(_) => {
// Not at start of line, cannot be a heading
return Err(nom::Err::Error(CustomError::MyError(MyError(
"Not a valid pre character for plain link.".into(),
))));
}
};
Ok((input, ()))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn post<'r, 's>(context: Context<'r, 's>, input: OrgSource<'s>) -> Res<OrgSource<'s>, ()> {
let (remaining, _) = alt((eof, recognize(none_of(WORD_CONSTITUENT_CHARACTERS))))(input)?;
Ok((remaining, ()))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub fn protocol<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
// TODO: This should be defined by org-link-parameters
let (remaining, proto) = alt((
alt((
tag_no_case("id"),
tag_no_case("eww"),
tag_no_case("rmail"),
tag_no_case("mhe"),
tag_no_case("irc"),
tag_no_case("info"),
tag_no_case("gnus"),
tag_no_case("docview"),
tag_no_case("bibtex"),
tag_no_case("bbdb"),
tag_no_case("w3m"),
)),
alt((
tag_no_case("doi"),
tag_no_case("file+sys"),
tag_no_case("file+emacs"),
tag_no_case("shell"),
tag_no_case("news"),
tag_no_case("mailto"),
tag_no_case("https"),
tag_no_case("http"),
tag_no_case("ftp"),
tag_no_case("help"),
tag_no_case("file"),
tag_no_case("elisp"),
)),
))(input)?;
Ok((remaining, proto))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn path_plain<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
// TODO: "optionally containing parenthesis-wrapped non-whitespace non-bracket substrings up to a depth of two. The string must end with either a non-punctation non-whitespace character, a forwards slash, or a parenthesis-wrapped substring"
let parser_context =
context.with_additional_node(ContextElement::ExitMatcherNode(ExitMatcherNode {
class: ExitClass::Beta,
exit_matcher: &path_plain_end,
}));
let exit_matcher = parser_with_context!(exit_matcher_parser)(&parser_context);
let (remaining, path) = recognize(many_till(anychar, peek(exit_matcher)))(input)?;
Ok((remaining, path))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn path_plain_end<'r, 's>(
context: Context<'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(one_of(" \t\r\n()[]<>"))(input)
}