organic/src/parser/affiliated_keyword.rs

197 lines
7.2 KiB
Rust
Raw Normal View History

use std::collections::BTreeMap;
2023-10-11 18:29:07 -04:00
use nom::bytes::complete::tag;
use nom::bytes::complete::take_until;
use nom::character::complete::anychar;
2023-10-11 12:06:05 -04:00
use nom::combinator::all_consuming;
2023-10-11 18:29:07 -04:00
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::map_parser;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
2023-10-11 12:06:05 -04:00
use nom::multi::many0;
2023-10-11 18:29:07 -04:00
use nom::multi::many_till;
use nom::sequence::tuple;
2023-10-11 12:06:05 -04:00
2023-10-17 11:10:18 -04:00
use super::keyword::affiliated_keyword;
2023-10-11 12:06:05 -04:00
use super::object_parser::standard_set_object;
2023-10-11 19:07:47 -04:00
use super::util::confine_context;
2023-10-17 11:10:18 -04:00
use super::OrgSource;
2023-10-16 18:29:21 -04:00
use crate::context::bind_context;
use crate::context::constants::ORG_ELEMENT_DUAL_KEYWORDS;
use crate::context::constants::ORG_ELEMENT_KEYWORD_TRANSLATION_ALIST;
use crate::context::constants::ORG_ELEMENT_PARSED_KEYWORDS;
2023-10-11 12:06:05 -04:00
use crate::context::Context;
use crate::context::ContextElement;
use crate::context::GlobalSettings;
2023-10-11 12:06:05 -04:00
use crate::context::List;
2023-10-17 11:10:18 -04:00
use crate::error::Res;
use crate::types::AffiliatedKeywordValue;
use crate::types::AffiliatedKeywords;
2023-10-11 11:17:01 -04:00
use crate::types::Keyword;
2023-10-17 11:10:18 -04:00
#[cfg_attr(
feature = "tracing",
tracing::instrument(ret, level = "debug", skip(context))
)]
pub(crate) fn affiliated_keywords<'s>(
2023-10-17 11:10:18 -04:00
input: OrgSource<'s>,
) -> Res<OrgSource<'s>, Vec<Keyword<'s>>> {
let mut ret = Vec::new();
let mut remaining = input;
loop {
let result = affiliated_keyword(remaining);
2023-10-17 11:10:18 -04:00
match result {
Ok((remain, kw)) => {
remaining = remain;
ret.push(kw);
}
Err(_) => {
break;
}
}
}
Ok((remaining, ret))
}
pub(crate) fn parse_affiliated_keywords<'g, 's, AK>(
global_settings: &'g GlobalSettings<'g, 's>,
input: AK,
) -> AffiliatedKeywords<'s>
where
AK: IntoIterator<Item = Keyword<'s>>,
{
let mut ret = BTreeMap::new();
for kw in input {
let translated_name = translate_name(kw.key);
let keyword_type = identify_keyword_type(translated_name.as_str());
match keyword_type {
AffiliatedKeywordType::SingleString => {
ret.insert(
translated_name,
AffiliatedKeywordValue::SingleString(kw.value),
);
}
AffiliatedKeywordType::ListOfStrings => {
let list_of_strings = ret.entry(translated_name).or_insert_with(|| {
AffiliatedKeywordValue::ListOfStrings(Vec::with_capacity(1))
});
match list_of_strings {
AffiliatedKeywordValue::ListOfStrings(list_of_strings) => {
list_of_strings.push(kw.value);
}
_ => panic!("Invalid AffiliatedKeywordValue type."),
}
}
AffiliatedKeywordType::OptionalPair => {
let (_remaining, optional_string) = opt(all_consuming(map(
tuple((
take_until::<_, &str, nom::error::Error<_>>("["),
tag("["),
recognize(many_till(anychar, peek(tuple((tag("]"), eof))))),
tag("]"),
eof,
)),
|(_, _, objects, _, _)| objects,
2023-10-16 18:29:21 -04:00
)))(kw.key)
.expect("Parser should always succeed.");
ret.insert(
translated_name,
AffiliatedKeywordValue::OptionalPair {
optval: optional_string,
val: kw.value,
},
);
}
AffiliatedKeywordType::ObjectTree => {
let initial_context = ContextElement::document_context();
let initial_context = Context::new(global_settings, List::new(&initial_context));
2023-10-11 12:06:05 -04:00
let (_remaining, optional_objects) = opt(all_consuming(map(
tuple((
take_until("["),
tag("["),
map_parser(
recognize(many_till(anychar, peek(tuple((tag("]"), eof))))),
confine_context(|i| {
2023-10-16 18:29:21 -04:00
all_consuming(many0(bind_context!(
standard_set_object,
&initial_context
)))(i)
}),
),
tag("]"),
eof,
)),
|(_, _, objects, _, _)| objects,
)))(kw.key.into())
.expect("Object parser should always succeed.");
2023-10-11 18:29:07 -04:00
// TODO: This should be omitting footnote references
2023-10-16 18:29:21 -04:00
let (_remaining, objects) = all_consuming(many0(bind_context!(
standard_set_object,
&initial_context
)))(kw.value.into())
.expect("Object parser should always succeed.");
let entry_per_keyword_list = ret
.entry(translated_name)
.or_insert_with(|| AffiliatedKeywordValue::ObjectTree(Vec::with_capacity(1)));
match entry_per_keyword_list {
AffiliatedKeywordValue::ObjectTree(entry_per_keyword_list) => {
entry_per_keyword_list.push((optional_objects, objects));
}
_ => panic!("Invalid AffiliatedKeywordValue type."),
}
}
};
}
AffiliatedKeywords { keywords: ret }
}
fn translate_name<'g, 's>(name: &'s str) -> String {
2023-10-11 18:50:22 -04:00
let name_until_optval = name
2023-10-16 18:29:21 -04:00
.split_once('[')
2023-10-11 18:50:22 -04:00
.map(|(before, _after)| before)
.unwrap_or(name);
for (src, dst) in ORG_ELEMENT_KEYWORD_TRANSLATION_ALIST {
2023-10-11 18:50:22 -04:00
if name_until_optval.eq_ignore_ascii_case(src) {
2023-10-11 13:00:21 -04:00
return dst.to_lowercase();
2023-10-11 12:06:05 -04:00
}
}
2023-10-11 18:50:22 -04:00
name_until_optval.to_lowercase()
2023-10-11 12:06:05 -04:00
}
2023-10-11 11:17:57 -04:00
enum AffiliatedKeywordType {
SingleString,
ListOfStrings,
OptionalPair,
ObjectTree,
}
fn identify_keyword_type<'g, 's>(name: &'s str) -> AffiliatedKeywordType {
let is_multiple = ["CAPTION", "HEADER"]
.into_iter()
.any(|candidate| name.eq_ignore_ascii_case(candidate))
|| name.to_lowercase().starts_with("attr_");
let is_parsed = ORG_ELEMENT_PARSED_KEYWORDS
.iter()
.any(|candidate| name.eq_ignore_ascii_case(candidate));
let can_have_optval = ORG_ELEMENT_DUAL_KEYWORDS
.iter()
.any(|candidate| name.eq_ignore_ascii_case(candidate));
match (is_multiple, is_parsed, can_have_optval) {
(true, true, true) => AffiliatedKeywordType::ObjectTree,
(true, true, false) => unreachable!("Nothing like this exists in upstream org-mode."),
(true, false, true) => unreachable!("Nothing like this exists in upstream org-mode."),
(true, false, false) => AffiliatedKeywordType::ListOfStrings,
(false, true, true) => unreachable!("Nothing like this exists in upstream org-mode."),
(false, true, false) => unreachable!("Nothing like this exists in upstream org-mode."),
(false, false, true) => AffiliatedKeywordType::OptionalPair,
(false, false, false) => AffiliatedKeywordType::SingleString,
}
}