organic/src/parser/keyword.rs
Tom Alexander 758e224e6d
Move consuming trailing element whitespace inside the parsers.
This ensures the parsers can take into account the affiliated keywords when setting their source without needing the SetSource trait.
2023-10-06 12:02:14 -04:00

236 lines
8.5 KiB
Rust

use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::bytes::complete::take_while1;
use nom::character::complete::anychar;
use nom::character::complete::line_ending;
use nom::character::complete::one_of;
use nom::character::complete::space0;
use nom::combinator::consumed;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::peek;
use nom::combinator::recognize;
use nom::combinator::verify;
use nom::multi::many0;
use nom::multi::many_till;
use nom::sequence::tuple;
use super::org_source::BracketDepth;
use super::org_source::OrgSource;
use super::util::get_consumed;
use super::util::get_name;
use super::util::maybe_consume_trailing_whitespace_if_not_exiting;
use crate::context::Matcher;
use crate::context::RefContext;
use crate::error::CustomError;
use crate::error::MyError;
use crate::error::Res;
use crate::parser::util::start_of_line;
use crate::types::Keyword;
const ORG_ELEMENT_AFFILIATED_KEYWORDS: [&'static str; 13] = [
"caption", "data", "headers", "header", "label", "name", "plot", "resname", "results",
"result", "source", "srcname", "tblname",
];
const ORG_ELEMENT_DUAL_KEYWORDS: [&'static str; 2] = ["caption", "results"];
pub(crate) fn filtered_keyword<F: Matcher>(
key_parser: F,
) -> impl for<'s> Fn(OrgSource<'s>) -> Res<OrgSource<'s>, Keyword<'s>> {
move |input| _filtered_keyword(&key_parser, input)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(key_parser))
)]
fn _filtered_keyword<'s, F: Matcher>(
key_parser: F,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Keyword<'s>> {
start_of_line(input)?;
// TODO: When key is a member of org-element-parsed-keywords, value can contain the standard set objects, excluding footnote references.
let (remaining, (consumed_input, (_, _, parsed_key, _))) =
consumed(tuple((space0, tag("#+"), key_parser, tag(":"))))(input)?;
match tuple((
space0::<OrgSource<'_>, CustomError<OrgSource<'_>>>,
alt((line_ending, eof)),
))(remaining)
{
Ok((remaining, _)) => {
return Ok((
remaining,
Keyword {
source: consumed_input.into(),
name: None, // To be populated by the caller if this keyword is in a context to support affiliated keywords.
key: parsed_key.into(),
value: "".into(),
},
));
}
Err(_) => {}
};
let (remaining, _ws) = space0(remaining)?;
let (remaining, parsed_value) = recognize(many_till(
anychar,
peek(tuple((space0, alt((line_ending, eof))))),
))(remaining)?;
let (remaining, _ws) = tuple((space0, alt((line_ending, eof))))(remaining)?;
Ok((
remaining,
Keyword {
source: consumed_input.into(),
name: None, // To be populated by the caller if this keyword is in a context to support affiliated keywords.
key: parsed_key.into(),
value: parsed_value.into(),
},
))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub(crate) fn keyword<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Keyword<'s>> {
let (remaining, affiliated_keywords) = many0(affiliated_keyword)(input)?;
let (remaining, mut kw) = filtered_keyword(regular_keyword_key)(remaining)?;
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
kw.name = get_name(&affiliated_keywords);
kw.source = Into::<&str>::into(source);
Ok((remaining, kw))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub(crate) fn affiliated_keyword_as_regular_keyword<'b, 'g, 'r, 's>(
context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Keyword<'s>> {
let (remaining, mut kw) = affiliated_keyword(input)?;
let (remaining, _trailing_ws) =
maybe_consume_trailing_whitespace_if_not_exiting(context, remaining)?;
let source = get_consumed(input, remaining);
kw.source = Into::<&str>::into(source);
Ok((remaining, kw))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub(crate) fn affiliated_keyword<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, Keyword<'s>> {
filtered_keyword(affiliated_key)(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
pub(crate) fn table_formula_keyword<'b, 'g, 'r, 's>(
_context: RefContext<'b, 'g, 'r, 's>,
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Keyword<'s>> {
verify(filtered_keyword(table_formula_key), |kw| {
!kw.value.is_empty()
})(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn table_formula_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
tag_no_case("tblfm")(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn regular_keyword_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
not(peek(alt((tag_no_case("call"), tag_no_case("begin")))))(input)?;
recognize(many_till(
anychar,
peek(alt((
recognize(one_of(" \t\r\n")), // Give up if we hit whitespace
recognize(tuple((tag(":"), one_of(" \t\r\n")))), // Stop if we see a colon followed by whitespace
recognize(tuple((tag(":"), is_not(" \t\r\n:"), not(tag(":"))))), // Stop if we see a colon that is the last colon before whitespace. This is for keywords like "#+foo:bar:baz: lorem: ipsum" which would have the key "foo:bar:baz".
))),
))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn affiliated_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
alt((
recognize(tuple((dual_affiliated_key, tag("["), optval, tag("]")))),
plain_affiliated_key,
export_keyword,
))(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn plain_affiliated_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
for keyword in ORG_ELEMENT_AFFILIATED_KEYWORDS {
let result = tag_no_case::<_, _, CustomError<_>>(keyword)(input);
match result {
Ok((remaining, ent)) => {
return Ok((remaining, ent));
}
Err(_) => {}
}
}
Err(nom::Err::Error(CustomError::MyError(MyError(
"NoKeywordKey".into(),
))))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn dual_affiliated_key<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
for keyword in ORG_ELEMENT_DUAL_KEYWORDS {
let result = tag_no_case::<_, _, CustomError<_>>(keyword)(input);
match result {
Ok((remaining, ent)) => {
return Ok((remaining, ent));
}
Err(_) => {}
}
}
Err(nom::Err::Error(CustomError::MyError(MyError(
"NoKeywordKey".into(),
))))
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn optval<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(many_till(
anychar,
peek(optval_end(input.get_bracket_depth())),
))(input)
}
const fn optval_end(
starting_bracket_depth: BracketDepth,
) -> impl for<'s> Fn(OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
move |input: OrgSource<'_>| _optval_end(input, starting_bracket_depth)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn _optval_end<'s>(
input: OrgSource<'s>,
starting_bracket_depth: BracketDepth,
) -> Res<OrgSource<'s>, OrgSource<'s>> {
let current_depth = input.get_bracket_depth() - starting_bracket_depth;
if current_depth < 0 {
// This shouldn't be possible because if depth is 0 then a closing bracket should end the opval.
unreachable!("Exceeded optval bracket depth.")
}
if current_depth == 0 {
let close_bracket = tag::<_, _, CustomError<_>>("]")(input);
if close_bracket.is_ok() {
return close_bracket;
}
}
tag("\n")(input)
}
#[cfg_attr(feature = "tracing", tracing::instrument(ret, level = "debug"))]
fn export_keyword<'s>(input: OrgSource<'s>) -> Res<OrgSource<'s>, OrgSource<'s>> {
recognize(tuple((
tag_no_case("attr_"),
take_while1(|c: char| c.is_alphanumeric() || "-_".contains(c)),
)))(input)
}