diff --git a/notes/affiliated_keyword_investigation/analysis.org b/notes/affiliated_keyword_investigation/analysis.org new file mode 100644 index 0000000..32cc1c7 --- /dev/null +++ b/notes/affiliated_keyword_investigation/analysis.org @@ -0,0 +1,42 @@ +* Elisp Structure +| Keyword | Single | Double | Single Optval | Double Optval | +|---------+---------------+---------------+---------------+---------------| +| CAPTION | objtree | objtree | objtree | objtree | +| DATA | quoted(:name) | quoted(:name) | - | - | +| HEADER | list(quoted) | list(quoted) | - | - | +| NAME | quoted(:name) | quoted(:name) | - | - | +| PLOT | quoted(:plot) | quoted(:plot) | - | - | +| RESULTS | optional pair | optional pair | optional pair | optional pair | +* types +** objtree +Outer list: 1 per keyword +next list: first entry = list of objects for value. remaining entries = optval +** list(quoted) +List of quoted strings, 1 per keyword +** quoted(NAME) +Quoted string under the NAME property (for example quoted(:name)) +** optional pair +When optval is supplied this is an alist with the field value being the real value and the 3nd value being the optval. +#+begin_src elisp + ("*f*" . "*bar*") +#+end_src + +When optval is not supplied this is a list containing a single string of the last occurrence of this keyword. +#+begin_src elisp + ("*c*") +#+end_src +* Default settings +#+begin_src text + org-element-dual-keywords ("CAPTION" "RESULTS") + org-element-parsed-keywords ("CAPTION") + org-element-multiple-keywords ("CAPTION" "HEADER") + org-babel-results-keyword "RESULTS" +#+end_src +* Analysis +We don't have an example of a parsed non-dual keyword + +Looks like multiple triggers list 1 per keyword + +dual triggers support for optval + +parsed triggers objects diff --git a/notes/affiliated_keyword_investigation/run_test.bash b/notes/affiliated_keyword_investigation/run_test.bash new file mode 100755 index 0000000..641dc70 --- /dev/null +++ b/notes/affiliated_keyword_investigation/run_test.bash @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# +set -euo pipefail +IFS=$'\n\t' +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +file_path="${DIR}/test_document.org" + +for TARGET_VARIABLE in RESULTS CAPTION HEADER DATA NAME PLOT; do + INIT_SCRIPT=$(cat < "${DIR}/${TARGET_VARIABLE}" +done + +# exec docker run --init --rm -i -t --mount type=tmpfs,destination=/tmp -v "/:/input:ro" -w /input --entrypoint "" organic-test emacs -q --no-site-file --no-splash --eval "$INIT_SCRIPT" + +# org-element-dual-keywords ("CAPTION" "RESULTS") +# org-element-parsed-keywords ("CAPTION") +# org-element-multiple-keywords ("CAPTION" "HEADER") +# org-babel-results-keyword "RESULTS" diff --git a/notes/affiliated_keyword_investigation/test_document.org b/notes/affiliated_keyword_investigation/test_document.org new file mode 100644 index 0000000..58f36d2 --- /dev/null +++ b/notes/affiliated_keyword_investigation/test_document.org @@ -0,0 +1,25 @@ +# Single instance +#+foo: *a* +#+begin_example + +#+end_example + +# Two instances +#+foo: *b* +#+foo: *c* +#+begin_example + +#+end_example + +# Single with optval +#+foo[*bar*]: *d* +#+begin_example + +#+end_example + +# Two with optval +#+foo[*bar*]: *e* +#+foo[*bar*]: *f* +#+begin_example + +#+end_example diff --git a/src/compare/compare_field.rs b/src/compare/compare_field.rs index 01896aa..47e6c9e 100644 --- a/src/compare/compare_field.rs +++ b/src/compare/compare_field.rs @@ -16,7 +16,6 @@ use super::util::get_property_unquoted_atom; use crate::types::AstNode; use crate::types::CharOffsetInLine; use crate::types::LineNumber; -use crate::types::Object; use crate::types::RetainLabels; use crate::types::SwitchNumberLines; @@ -288,6 +287,107 @@ pub(crate) fn compare_property_set_of_quoted_string< Ok(ComparePropertiesResult::NoChange) } +pub(crate) fn compare_property_optional_pair< + 'b, + 's, + 'x, + R, + RV: AsRef + std::fmt::Debug, + ROV: AsRef + std::fmt::Debug, + RG: Fn(R) -> Option<(Option, RV)>, +>( + _source: &'s str, + emacs: &'b Token<'s>, + rust_node: R, + emacs_field: &'x str, + rust_value_getter: RG, +) -> Result, Box> { + let value = get_property(emacs, emacs_field)? + .map(Token::as_list) + .map_or(Ok(None), |r| r.map(Some))?; + let rust_value = rust_value_getter(rust_node); + match (value, &rust_value) { + (None, None) => {} + (None, Some(_)) | (Some(_), None) => { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}", + emacs_field, value, rust_value + )); + return Ok(ComparePropertiesResult::SelfChange(this_status, message)); + } + (Some(el), Some((Some(_), _))) if el.len() != 3 => { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}", + emacs_field, value, rust_value + )); + return Ok(ComparePropertiesResult::SelfChange(this_status, message)); + } + (Some(el), Some((None, _))) if el.len() != 1 => { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}", + emacs_field, value, rust_value + )); + return Ok(ComparePropertiesResult::SelfChange(this_status, message)); + } + (Some(el), Some((Some(orl), rl))) => { + let e = el + .first() + .map(Token::as_atom) + .map_or(Ok(None), |r| r.map(Some))? + .map(unquote) + .map_or(Ok(None), |r| r.map(Some))? + .expect("Above match proved length to be 3."); + let oe = el + .get(2) + .map(Token::as_atom) + .map_or(Ok(None), |r| r.map(Some))? + .map(unquote) + .map_or(Ok(None), |r| r.map(Some))? + .expect("Above match proved length to be 3."); + let r = rl.as_ref(); + let or = orl.as_ref(); + if e != r { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}. Full list: {:?} != {:?}", + emacs_field, e, r, value, rust_value + )); + return Ok(ComparePropertiesResult::SelfChange(this_status, message)); + } + if oe != or { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}. Full list: {:?} != {:?}", + emacs_field, e, r, value, rust_value + )); + return Ok(ComparePropertiesResult::SelfChange(this_status, message)); + } + } + (Some(el), Some((None, rl))) => { + let e = el + .first() + .map(Token::as_atom) + .map_or(Ok(None), |r| r.map(Some))? + .map(unquote) + .map_or(Ok(None), |r| r.map(Some))? + .expect("Above match proved length to be 1."); + let r = rl.as_ref(); + if e != r { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}. Full list: {:?} != {:?}", + emacs_field, e, r, value, rust_value + )); + return Ok(ComparePropertiesResult::SelfChange(this_status, message)); + } + } + } + Ok(ComparePropertiesResult::NoChange) +} + pub(crate) fn compare_property_boolean<'b, 's, 'x, R, RG: Fn(R) -> bool>( _source: &'s str, emacs: &'b Token<'s>, @@ -412,118 +512,133 @@ where Ok(ComparePropertiesResult::NoChange) } -/// Special compare used for affiliate keywords that are parsed as objects. -/// -/// Org-mode seems to store these as a 3-deep list: -/// - Outer list with 1 element per #+caption keyword (or other parsed keyword). -/// - Middle list which has: -/// - first element is a list of objects representing the value after the colon. -/// - every additional element is a list of objects from inside the square brackets (the optional value). -pub(crate) fn compare_property_list_of_list_of_list_of_ast_nodes< +pub(crate) fn compare_property_object_tree< 'b, 's, 'x, R, - RG: Fn(R) -> Option<&'b Vec<(Option>>, Vec>)>>, + RV: std::fmt::Debug + 'b, + ROV: std::fmt::Debug + 'b, + RI: Iterator>, Vec)> + ExactSizeIterator + std::fmt::Debug, + RG: Fn(R) -> Option, >( source: &'s str, emacs: &'b Token<'s>, rust_node: R, emacs_field: &'x str, rust_value_getter: RG, -) -> Result, Box> { - // TODO: Replace Object<'s> with generics. I hard-coded Object in to make lifetimes easier. - let rust_value = rust_value_getter(rust_node); +) -> Result, Box> +where + AstNode<'b, 's>: From<&'b RV>, + AstNode<'b, 's>: From<&'b ROV>, +{ let value = get_property(emacs, emacs_field)? .map(Token::as_list) .map_or(Ok(None), |r| r.map(Some))?; - let (value, rust_value) = match (value, rust_value) { + let rust_value = rust_value_getter(rust_node); + let (outer_emacs_list, outer_rust_list) = match (value, rust_value) { (None, None) => { return Ok(ComparePropertiesResult::NoChange); } - (None, Some(_)) | (Some(_), None) => { + (None, rv @ Some(_)) | (Some(_), rv @ None) => { let this_status = DiffStatus::Bad; let message = Some(format!( "{} mismatch (emacs != rust) {:?} != {:?}", - emacs_field, value, rust_value + emacs_field, value, rv )); return Ok(ComparePropertiesResult::SelfChange(this_status, message)); } - (Some(value), Some(rust_value)) if value.len() != rust_value.len() => { + (Some(el), Some(rl)) if el.len() != rl.len() => { let this_status = DiffStatus::Bad; let message = Some(format!( "{} mismatch (emacs != rust) {:?} != {:?}", - emacs_field, value, rust_value + emacs_field, el, rl )); return Ok(ComparePropertiesResult::SelfChange(this_status, message)); } - (Some(value), Some(rust_value)) => (value, rust_value), + (Some(el), Some(rl)) => (el, rl), }; + let mut full_status: Vec> = Vec::with_capacity(outer_rust_list.len()); - let mut full_status: Vec> = Vec::with_capacity(rust_value.len()); - - // Iterate the outer lists - for (value, (rust_optional, rust_value)) in value.iter().zip(rust_value.iter()) { - let mut middle_value = value.as_list()?.iter(); - // First element of middle list is the mandatory value (the value past the colon). - let mandatory_value = middle_value.next(); - let mandatory_value = match mandatory_value { - Some(mandatory_value) => mandatory_value, - None => { + for (kw_e, kw_r) in outer_emacs_list.into_iter().zip(outer_rust_list) { + let kw_e = kw_e.as_list()?; + let child_status_length = kw_r.1.len() + kw_r.0.as_ref().map(|opt| opt.len()).unwrap_or(0); + let mut child_status: Vec> = Vec::with_capacity(child_status_length); + if let Some(or) = &kw_r.0 { + // if optional value + let mut kw_e = kw_e.into_iter(); + // First element is a list representing the mandatory value. + if let Some(val_e) = kw_e.next() { + let el = val_e.as_list()?; + if el.len() != kw_r.1.len() { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}", + emacs_field, kw_e, kw_r + )); + return Ok(ComparePropertiesResult::SelfChange(this_status, message)); + } + for (e, r) in el.into_iter().zip(kw_r.1.iter()) { + child_status.push(compare_ast_node(source, e, r.into())?); + } + } else { let this_status = DiffStatus::Bad; let message = Some(format!( "{} mismatch (emacs != rust) {:?} != {:?}", - emacs_field, value, rust_value + emacs_field, kw_e, kw_r )); return Ok(ComparePropertiesResult::SelfChange(this_status, message)); } - }; - - // Compare optional value - if let Some(rust_optional) = rust_optional { - let mut child_status: Vec> = Vec::with_capacity(rust_value.len()); - if rust_optional.len() != middle_value.len() { + // Remaining elements are the optional value. + if kw_e.len() != or.len() { let this_status = DiffStatus::Bad; let message = Some(format!( - "{} optional value length mismatch (emacs != rust) {} != {} | {:?}", - emacs_field, - middle_value.len(), - rust_optional.len(), - rust_optional + "{} mismatch (emacs != rust) {:?} != {:?}", + emacs_field, kw_e, kw_r )); return Ok(ComparePropertiesResult::SelfChange(this_status, message)); } - for (e, r) in middle_value.zip(rust_optional) { + for (e, r) in kw_e.zip(or.iter()) { child_status.push(compare_ast_node(source, e, r.into())?); } - if !child_status.is_empty() { - let diff_scope = artificial_diff_scope("optional value", child_status)?; - full_status.push(diff_scope); + } else { + // if no optional value + if !kw_e.len() == 1 { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}", + emacs_field, kw_e, kw_r + )); + return Ok(ComparePropertiesResult::SelfChange(this_status, message)); } - } - // Compare mandatory value - let mut child_status: Vec> = Vec::with_capacity(rust_value.len()); - let mandatory_value = mandatory_value.as_list()?; - if rust_value.len() != mandatory_value.len() { - let this_status = DiffStatus::Bad; - let message = Some(format!( - "{} mandatory value length mismatch (emacs != rust) {} != {} | {:?}", - emacs_field, - mandatory_value.len(), - rust_value.len(), - rust_value - )); - return Ok(ComparePropertiesResult::SelfChange(this_status, message)); - } - for (e, r) in mandatory_value.iter().zip(rust_value) { - child_status.push(compare_ast_node(source, e, r.into())?); + let e = kw_e + .first() + .map(Token::as_list) + .map_or(Ok(None), |r| r.map(Some))? + .expect("The above if-statement proves this will be Some.") + .iter(); + let r = kw_r.1.iter(); + + if e.len() != r.len() { + let this_status = DiffStatus::Bad; + let message = Some(format!( + "{} mismatch (emacs != rust) {:?} != {:?}", + emacs_field, kw_e, kw_r + )); + return Ok(ComparePropertiesResult::SelfChange(this_status, message)); + } + + for (e, r) in e.zip(r) { + child_status.push(compare_ast_node(source, e, r.into())?); + } } if !child_status.is_empty() { let diff_scope = artificial_diff_scope("mandatory value", child_status)?; full_status.push(diff_scope); } } + if full_status.is_empty() { Ok(ComparePropertiesResult::NoChange) } else { diff --git a/src/compare/util.rs b/src/compare/util.rs index 5dcc0ed..c0a3760 100644 --- a/src/compare/util.rs +++ b/src/compare/util.rs @@ -1,7 +1,8 @@ use std::str::FromStr; -use super::compare_field::compare_property_list_of_list_of_list_of_ast_nodes; use super::compare_field::compare_property_list_of_quoted_string; +use super::compare_field::compare_property_object_tree; +use super::compare_field::compare_property_optional_pair; use super::compare_field::compare_property_quoted_string; use super::compare_field::ComparePropertiesResult; use super::diff::DiffEntry; @@ -376,13 +377,23 @@ where )?; ret.push(diff); } - AffiliatedKeywordValue::ListOfListsOfObjects(rust_value) => { - let diff = compare_property_list_of_list_of_list_of_ast_nodes( + AffiliatedKeywordValue::OptionalPair { optval, val } => { + let diff = compare_property_optional_pair( source, emacs, rust, emacs_property_name.as_str(), - |_| Some(rust_value), + |_| Some((*optval, *val)), + )?; + ret.push(diff); + } + AffiliatedKeywordValue::ObjectTree(rust_value) => { + let diff = compare_property_object_tree( + source, + emacs, + rust, + emacs_property_name.as_str(), + |_| Some(rust_value.iter()), )?; ret.push(diff); } diff --git a/src/parser/affiliated_keyword.rs b/src/parser/affiliated_keyword.rs index 92b59fc..485fadc 100644 --- a/src/parser/affiliated_keyword.rs +++ b/src/parser/affiliated_keyword.rs @@ -35,75 +35,86 @@ where let mut ret = BTreeMap::new(); for kw in input { let translated_name = translate_name(global_settings, kw.key); - if is_single_string_keyword(global_settings, translated_name.as_str()) { - ret.insert( - translated_name, - AffiliatedKeywordValue::SingleString(kw.value), - ); - } else if is_list_of_single_string_keyword(global_settings, translated_name.as_str()) { - let list_of_strings = ret - .entry(translated_name) - .or_insert_with(|| AffiliatedKeywordValue::ListOfStrings(Vec::with_capacity(1))); - match list_of_strings { - AffiliatedKeywordValue::ListOfStrings(list_of_strings) - if list_of_strings.is_empty() => - { - list_of_strings.push(kw.value); - } - AffiliatedKeywordValue::ListOfStrings(list_of_strings) => { - list_of_strings.clear(); - list_of_strings.push(kw.value); - } - _ => panic!("Invalid AffiliatedKeywordValue type."), + let keyword_type = identify_keyword_type(global_settings, translated_name.as_str()); + match keyword_type { + AffiliatedKeywordType::SingleString => { + ret.insert( + translated_name, + AffiliatedKeywordValue::SingleString(kw.value), + ); } - } else if is_list_of_objects_keyword(global_settings, translated_name.as_str()) { - let initial_context = ContextElement::document_context(); - let initial_context = Context::new(global_settings, List::new(&initial_context)); - - let (_remaining, optional_objects) = opt(all_consuming(map( - tuple(( - take_until("["), - tag("["), - map_parser( + AffiliatedKeywordType::ListOfStrings => { + let list_of_strings = ret.entry(translated_name).or_insert_with(|| { + AffiliatedKeywordValue::ListOfStrings(Vec::with_capacity(1)) + }); + match list_of_strings { + AffiliatedKeywordValue::ListOfStrings(list_of_strings) => { + list_of_strings.push(kw.value); + } + _ => panic!("Invalid AffiliatedKeywordValue type."), + } + } + AffiliatedKeywordType::OptionalPair => { + let (_remaining, optional_string) = opt(all_consuming(map( + tuple(( + take_until::<_, &str, nom::error::Error<_>>("["), + tag("["), recognize(many_till(anychar, peek(tuple((tag("]"), eof))))), - confine_context(|i| { - all_consuming(many0(parser_with_context!(standard_set_object)( - &initial_context, - )))(i) - }), - ), - tag("]"), - eof, - )), - |(_, _, objects, _, _)| objects, - )))(kw.key.into()) - .expect("Object parser should always succeed."); + tag("]"), + eof, + )), + |(_, _, objects, _, _)| objects, + )))(kw.key.into()) + .expect("Parser should always succeed."); + ret.insert( + translated_name, + AffiliatedKeywordValue::OptionalPair { + optval: optional_string, + val: kw.value, + }, + ); + } + AffiliatedKeywordType::ObjectTree => { + let initial_context = ContextElement::document_context(); + let initial_context = Context::new(global_settings, List::new(&initial_context)); - // TODO: This should be omitting footnote references - let (_remaining, objects) = all_consuming(many0(parser_with_context!( - standard_set_object - )(&initial_context)))(kw.value.into()) - .expect("Object parser should always succeed."); - let list_of_lists = ret.entry(translated_name).or_insert_with(|| { - AffiliatedKeywordValue::ListOfListsOfObjects(Vec::with_capacity(1)) - }); - match list_of_lists { - AffiliatedKeywordValue::ListOfListsOfObjects(list_of_lists) => { - list_of_lists.push((optional_objects, objects)); + let (_remaining, optional_objects) = opt(all_consuming(map( + tuple(( + take_until("["), + tag("["), + map_parser( + recognize(many_till(anychar, peek(tuple((tag("]"), eof))))), + confine_context(|i| { + all_consuming(many0(parser_with_context!(standard_set_object)( + &initial_context, + )))(i) + }), + ), + tag("]"), + eof, + )), + |(_, _, objects, _, _)| objects, + )))(kw.key.into()) + .expect("Object parser should always succeed."); + + // TODO: This should be omitting footnote references + let (_remaining, objects) = + all_consuming(many0(parser_with_context!(standard_set_object)( + &initial_context, + )))(kw.value.into()) + .expect("Object parser should always succeed."); + + let entry_per_keyword_list = ret + .entry(translated_name) + .or_insert_with(|| AffiliatedKeywordValue::ObjectTree(Vec::with_capacity(1))); + match entry_per_keyword_list { + AffiliatedKeywordValue::ObjectTree(entry_per_keyword_list) => { + entry_per_keyword_list.push((optional_objects, objects)); + } + _ => panic!("Invalid AffiliatedKeywordValue type."), } - _ => panic!("Invalid AffiliatedKeywordValue type."), } - } else { - let list_of_strings = ret - .entry(translated_name) - .or_insert_with(|| AffiliatedKeywordValue::ListOfStrings(Vec::with_capacity(1))); - match list_of_strings { - AffiliatedKeywordValue::ListOfStrings(list_of_strings) => { - list_of_strings.push(kw.value); - } - _ => panic!("Invalid AffiliatedKeywordValue type."), - } - } + }; } AffiliatedKeywords { keywords: ret } } @@ -121,40 +132,37 @@ fn translate_name<'g, 's>(global_settings: &'g GlobalSettings<'g, 's>, name: &'s name_until_optval.to_lowercase() } -fn is_single_string_keyword<'g, 's>( - _global_settings: &'g GlobalSettings<'g, 's>, - name: &'s str, -) -> bool { - // TODO: Is this defined by an elisp variable? - for single_string_name in ["plot", "name"] { - if name.eq_ignore_ascii_case(single_string_name) { - return true; - } - } - false +enum AffiliatedKeywordType { + SingleString, + ListOfStrings, + OptionalPair, + ObjectTree, } -fn is_list_of_single_string_keyword<'g, 's>( - _global_settings: &'g GlobalSettings<'g, 's>, - name: &'s str, -) -> bool { - // TODO: Is this defined by an elisp variable? - for single_string_name in ["results"] { - if name.eq_ignore_ascii_case(single_string_name) { - return true; - } - } - false -} - -fn is_list_of_objects_keyword<'g, 's>( +fn identify_keyword_type<'g, 's>( global_settings: &'g GlobalSettings<'g, 's>, name: &'s str, -) -> bool { - for parsed_keyword in global_settings.element_parsed_keywords { - if name.eq_ignore_ascii_case(parsed_keyword) { - return true; - } +) -> AffiliatedKeywordType { + let is_multiple = ["CAPTION", "HEADER"] + .into_iter() + .any(|candidate| name.eq_ignore_ascii_case(candidate)) + || name.to_lowercase().starts_with("attr_"); + let is_parsed = global_settings + .element_parsed_keywords + .iter() + .any(|candidate| name.eq_ignore_ascii_case(candidate)); + let can_have_optval = global_settings + .element_dual_keywords + .iter() + .any(|candidate| name.eq_ignore_ascii_case(candidate)); + match (is_multiple, is_parsed, can_have_optval) { + (true, true, true) => AffiliatedKeywordType::ObjectTree, + (true, true, false) => unreachable!("Nothing like this exists in upstream org-mode."), + (true, false, true) => unreachable!("Nothing like this exists in upstream org-mode."), + (true, false, false) => AffiliatedKeywordType::ListOfStrings, + (false, true, true) => unreachable!("Nothing like this exists in upstream org-mode."), + (false, true, false) => unreachable!("Nothing like this exists in upstream org-mode."), + (false, false, true) => AffiliatedKeywordType::OptionalPair, + (false, false, false) => AffiliatedKeywordType::SingleString, } - false } diff --git a/src/types/affiliated_keyword.rs b/src/types/affiliated_keyword.rs index 5831a4d..d5f2fd4 100644 --- a/src/types/affiliated_keyword.rs +++ b/src/types/affiliated_keyword.rs @@ -6,7 +6,11 @@ use super::Object; pub enum AffiliatedKeywordValue<'s> { SingleString(&'s str), ListOfStrings(Vec<&'s str>), - ListOfListsOfObjects(Vec<(Option>>, Vec>)>), + OptionalPair { + optval: Option<&'s str>, + val: &'s str, + }, + ObjectTree(Vec<(Option>>, Vec>)>), } #[derive(Debug)]