organic/src/parser/affiliated_keyword.rs

158 lines
5.7 KiB
Rust
Raw Normal View History

use std::collections::BTreeMap;
2023-10-11 18:29:07 -04:00
use nom::bytes::complete::tag;
use nom::bytes::complete::take_until;
use nom::character::complete::anychar;
2023-10-11 12:06:05 -04:00
use nom::combinator::all_consuming;
2023-10-11 18:29:07 -04:00
use nom::combinator::eof;
use nom::combinator::map;
use nom::combinator::map_parser;
use nom::combinator::opt;
use nom::combinator::peek;
use nom::combinator::recognize;
2023-10-11 12:06:05 -04:00
use nom::multi::many0;
2023-10-11 18:29:07 -04:00
use nom::multi::many_till;
use nom::sequence::tuple;
2023-10-11 12:06:05 -04:00
use super::object_parser::standard_set_object;
2023-10-11 19:07:47 -04:00
use super::util::confine_context;
2023-10-11 12:06:05 -04:00
use crate::context::parser_with_context;
use crate::context::Context;
use crate::context::ContextElement;
use crate::context::GlobalSettings;
2023-10-11 12:06:05 -04:00
use crate::context::List;
use crate::types::AffiliatedKeywordValue;
use crate::types::AffiliatedKeywords;
2023-10-11 11:17:01 -04:00
use crate::types::Keyword;
pub(crate) fn parse_affiliated_keywords<'g, 's>(
global_settings: &'g GlobalSettings<'g, 's>,
input: Vec<Keyword<'s>>,
) -> AffiliatedKeywords<'s> {
let mut ret = BTreeMap::new();
for kw in input.into_iter() {
2023-10-11 12:06:05 -04:00
let translated_name = translate_name(global_settings, kw.key);
2023-10-11 13:00:21 -04:00
if is_single_string_keyword(global_settings, translated_name.as_str()) {
2023-10-11 12:06:05 -04:00
ret.insert(
translated_name,
AffiliatedKeywordValue::SingleString(kw.value),
);
2023-10-11 13:55:43 -04:00
} else if is_list_of_single_string_keyword(global_settings, translated_name.as_str()) {
let list_of_strings = ret
.entry(translated_name)
.or_insert_with(|| AffiliatedKeywordValue::ListOfStrings(Vec::with_capacity(1)));
match list_of_strings {
AffiliatedKeywordValue::ListOfStrings(list_of_strings)
if list_of_strings.is_empty() =>
{
list_of_strings.push(kw.value);
}
AffiliatedKeywordValue::ListOfStrings(list_of_strings) => {
list_of_strings.clear();
list_of_strings.push(kw.value);
}
_ => panic!("Invalid AffiliatedKeywordValue type."),
}
2023-10-11 13:00:21 -04:00
} else if is_list_of_objects_keyword(global_settings, translated_name.as_str()) {
2023-10-11 12:06:05 -04:00
let initial_context = ContextElement::document_context();
let initial_context = Context::new(global_settings, List::new(&initial_context));
2023-10-11 18:29:07 -04:00
let (_remaining, optional_objects) = opt(all_consuming(map(
tuple((
take_until("["),
tag("["),
map_parser(
recognize(many_till(anychar, peek(tuple((tag("]"), eof))))),
2023-10-11 19:07:47 -04:00
confine_context(|i| {
all_consuming(many0(parser_with_context!(standard_set_object)(
&initial_context,
)))(i)
}),
2023-10-11 18:29:07 -04:00
),
tag("]"),
eof,
)),
|(_, _, objects, _, _)| objects,
)))(kw.key.into())
.expect("Object parser should always succeed.");
2023-10-11 12:06:05 -04:00
// TODO: This should be omitting footnote references
let (_remaining, objects) = all_consuming(many0(parser_with_context!(
standard_set_object
)(&initial_context)))(kw.value.into())
.expect("Object parser should always succeed.");
let list_of_lists = ret.entry(translated_name).or_insert_with(|| {
AffiliatedKeywordValue::ListOfListsOfObjects(Vec::with_capacity(1))
});
match list_of_lists {
AffiliatedKeywordValue::ListOfListsOfObjects(list_of_lists) => {
2023-10-11 18:29:07 -04:00
list_of_lists.push((optional_objects, objects));
2023-10-11 12:06:05 -04:00
}
_ => panic!("Invalid AffiliatedKeywordValue type."),
}
} else {
let list_of_strings = ret
.entry(translated_name)
.or_insert_with(|| AffiliatedKeywordValue::ListOfStrings(Vec::with_capacity(1)));
match list_of_strings {
AffiliatedKeywordValue::ListOfStrings(list_of_strings) => {
list_of_strings.push(kw.value);
}
_ => panic!("Invalid AffiliatedKeywordValue type."),
}
}
}
AffiliatedKeywords { keywords: ret }
}
2023-10-11 13:00:21 -04:00
fn translate_name<'g, 's>(global_settings: &'g GlobalSettings<'g, 's>, name: &'s str) -> String {
2023-10-11 18:50:22 -04:00
let name_until_optval = name
.split_once("[")
.map(|(before, _after)| before)
.unwrap_or(name);
2023-10-11 12:06:05 -04:00
for (src, dst) in global_settings.element_keyword_translation_alist {
2023-10-11 18:50:22 -04:00
if name_until_optval.eq_ignore_ascii_case(src) {
2023-10-11 13:00:21 -04:00
return dst.to_lowercase();
2023-10-11 12:06:05 -04:00
}
}
2023-10-11 18:50:22 -04:00
name_until_optval.to_lowercase()
2023-10-11 12:06:05 -04:00
}
2023-10-11 11:17:57 -04:00
2023-10-11 12:06:05 -04:00
fn is_single_string_keyword<'g, 's>(
_global_settings: &'g GlobalSettings<'g, 's>,
name: &'s str,
) -> bool {
2023-10-11 13:55:43 -04:00
// TODO: Is this defined by an elisp variable?
for single_string_name in ["plot", "name"] {
if name.eq_ignore_ascii_case(single_string_name) {
return true;
}
}
false
}
fn is_list_of_single_string_keyword<'g, 's>(
_global_settings: &'g GlobalSettings<'g, 's>,
2023-10-11 13:55:43 -04:00
name: &'s str,
) -> bool {
// TODO: Is this defined by an elisp variable?
2023-10-11 13:55:43 -04:00
for single_string_name in ["results"] {
if name.eq_ignore_ascii_case(single_string_name) {
return true;
}
}
false
2023-10-11 12:06:05 -04:00
}
2023-10-11 11:17:57 -04:00
2023-10-11 12:06:05 -04:00
fn is_list_of_objects_keyword<'g, 's>(
global_settings: &'g GlobalSettings<'g, 's>,
name: &'s str,
) -> bool {
for parsed_keyword in global_settings.element_parsed_keywords {
if name.eq_ignore_ascii_case(parsed_keyword) {
return true;
}
}
false
}