organic/src/compare/util.rs

387 lines
13 KiB
Rust
Raw Normal View History

use std::str::FromStr;
2023-10-11 13:00:21 -04:00
use super::compare_field::compare_property_quoted_string;
use super::compare_field::ComparePropertiesResult;
use super::diff::DiffEntry;
use super::diff::DiffStatus;
use super::elisp_fact::GetElispFact;
2023-09-11 15:31:48 -04:00
use super::sexp::Token;
use crate::compare::diff::compare_ast_node;
2023-09-29 17:28:50 -04:00
use crate::compare::sexp::unquote;
2023-10-11 13:00:21 -04:00
use crate::types::AffiliatedKeywordValue;
use crate::types::AstNode;
2023-10-11 12:42:42 -04:00
use crate::types::GetAffiliatedKeywords;
use crate::types::GetStandardProperties;
2023-09-23 17:59:13 -04:00
use crate::types::StandardProperties;
/// Check if the child string slice is a slice of the parent string slice.
fn is_slice_of(parent: &str, child: &str) -> bool {
let parent_start = parent.as_ptr() as usize;
let parent_end = parent_start + parent.len();
let child_start = child.as_ptr() as usize;
let child_end = child_start + child.len();
child_start >= parent_start && child_end <= parent_end
}
2023-09-23 17:59:13 -04:00
/// Get the byte offset into source that the rust object exists at.
///
/// These offsets are zero-based unlike the elisp ones.
fn get_rust_byte_offsets<'b, 's, S: StandardProperties<'s> + ?Sized>(
2023-09-23 17:59:13 -04:00
original_document: &'s str,
rust_ast_node: &'b S,
2023-09-23 17:59:13 -04:00
) -> (usize, usize) {
let rust_object_source = rust_ast_node.get_source();
debug_assert!(is_slice_of(original_document, rust_object_source));
let offset = rust_object_source.as_ptr() as usize - original_document.as_ptr() as usize;
let end = offset + rust_object_source.len();
(offset, end)
}
pub(crate) fn compare_standard_properties<
'b,
's,
S: GetStandardProperties<'s> + GetElispFact<'s> + ?Sized,
>(
original_document: &'s str,
emacs: &'b Token<'s>,
rust: &'b S,
) -> Result<(), Box<dyn std::error::Error>> {
assert_name(emacs, rust.get_elisp_fact().get_elisp_name())?;
assert_bounds(original_document, emacs, rust.get_standard_properties())?;
Ok(())
}
pub(crate) fn assert_name<'b, 's, S: AsRef<str>>(
emacs: &'b Token<'s>,
name: S,
2023-09-11 13:13:28 -04:00
) -> Result<(), Box<dyn std::error::Error>> {
let name = name.as_ref();
let children = emacs.as_list()?;
let first_child = children
.first()
.ok_or("Should have at least one child.")?
.as_atom()?;
if first_child != name {
Err(format!(
"AST node name mismatch. Expected a (rust) {expected} cell, but found a (emacs) {found} cell.",
expected = name,
found = first_child
))?;
}
Ok(())
}
2023-04-19 15:29:46 -04:00
2023-09-23 17:59:13 -04:00
/// Assert that the character ranges defined by upstream org-mode's :standard-properties match the slices in Organic's StandardProperties.
///
/// This does **not** handle plain text because plain text is a special case.
pub(crate) fn assert_bounds<'b, 's, S: StandardProperties<'s> + ?Sized>(
2023-09-23 17:59:13 -04:00
original_document: &'s str,
emacs: &'b Token<'s>,
rust: &'b S,
2023-04-19 15:29:46 -04:00
) -> Result<(), Box<dyn std::error::Error>> {
2023-09-23 17:59:13 -04:00
let standard_properties = get_emacs_standard_properties(emacs)?; // 1-based
2023-08-25 02:55:01 -04:00
let (begin, end) = (
standard_properties
.begin
.ok_or("Token should have a begin.")?,
standard_properties.end.ok_or("Token should have an end.")?,
2023-08-25 02:55:01 -04:00
);
2023-09-23 17:59:13 -04:00
let (rust_begin, rust_end) = get_rust_byte_offsets(original_document, rust); // 0-based
let rust_begin_char_offset = (&original_document[..rust_begin]).chars().count() + 1; // 1-based
let rust_end_char_offset =
2023-09-23 17:59:13 -04:00
rust_begin_char_offset + (&original_document[rust_begin..rust_end]).chars().count(); // 1-based
if rust_begin_char_offset != begin || rust_end_char_offset != end {
Err(format!("Rust bounds (in chars) ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin_char_offset, rust_end = rust_end_char_offset, emacs_begin=begin, emacs_end=end))?;
2023-08-25 02:55:01 -04:00
}
Ok(())
}
struct EmacsStandardProperties {
2023-08-25 02:55:01 -04:00
begin: Option<usize>,
#[allow(dead_code)]
post_affiliated: Option<usize>,
#[allow(dead_code)]
contents_begin: Option<usize>,
#[allow(dead_code)]
contents_end: Option<usize>,
end: Option<usize>,
#[allow(dead_code)]
post_blank: Option<usize>,
}
fn get_emacs_standard_properties<'b, 's>(
emacs: &'b Token<'s>,
) -> Result<EmacsStandardProperties, Box<dyn std::error::Error>> {
2023-04-19 15:29:46 -04:00
let children = emacs.as_list()?;
let attributes_child = children
.iter()
.nth(1)
.ok_or("Should have an attributes child.")?;
let attributes_map = attributes_child.as_map()?;
let standard_properties = attributes_map.get(":standard-properties");
2023-08-25 02:55:01 -04:00
Ok(if standard_properties.is_some() {
let mut std_props = standard_properties
.expect("if statement proves its Some")
2023-08-25 02:55:01 -04:00
.as_vector()?
.into_iter();
let begin = maybe_token_to_usize(std_props.next())?;
let post_affiliated = maybe_token_to_usize(std_props.next())?;
let contents_begin = maybe_token_to_usize(std_props.next())?;
let contents_end = maybe_token_to_usize(std_props.next())?;
let end = maybe_token_to_usize(std_props.next())?;
let post_blank = maybe_token_to_usize(std_props.next())?;
EmacsStandardProperties {
2023-08-25 02:55:01 -04:00
begin,
post_affiliated,
contents_begin,
contents_end,
end,
post_blank,
}
} else {
2023-08-25 02:55:01 -04:00
let begin = maybe_token_to_usize(attributes_map.get(":begin").map(|token| *token))?;
let end = maybe_token_to_usize(attributes_map.get(":end").map(|token| *token))?;
let contents_begin =
maybe_token_to_usize(attributes_map.get(":contents-begin").map(|token| *token))?;
let contents_end =
maybe_token_to_usize(attributes_map.get(":contents-end").map(|token| *token))?;
let post_blank =
maybe_token_to_usize(attributes_map.get(":post-blank").map(|token| *token))?;
let post_affiliated =
maybe_token_to_usize(attributes_map.get(":post-affiliated").map(|token| *token))?;
EmacsStandardProperties {
2023-08-25 02:55:01 -04:00
begin,
post_affiliated,
contents_begin,
contents_end,
end,
post_blank,
}
})
}
2023-04-19 15:29:46 -04:00
2023-08-25 02:55:01 -04:00
fn maybe_token_to_usize(
token: Option<&Token<'_>>,
) -> Result<Option<usize>, Box<dyn std::error::Error>> {
Ok(token
.map(|token| token.as_atom())
.map_or(Ok(None), |r| r.map(Some))?
.map(|val| {
if val == "nil" {
None
} else {
Some(val.parse::<usize>())
}
})
.flatten() // Outer option is whether or not the param exists, inner option is whether or not it is nil
.map_or(Ok(None), |r| r.map(Some))?)
2023-04-19 15:29:46 -04:00
}
2023-08-29 22:07:23 -04:00
/// Get a named property from the emacs token.
///
/// Returns Ok(None) if value is nil or absent.
pub(crate) fn get_property<'b, 's, 'x>(
emacs: &'b Token<'s>,
2023-08-29 22:07:23 -04:00
key: &'x str,
) -> Result<Option<&'b Token<'s>>, Box<dyn std::error::Error>> {
2023-08-29 22:07:23 -04:00
let children = emacs.as_list()?;
let attributes_child = children
.iter()
.nth(1)
.ok_or("Should have an attributes child.")?;
let attributes_map = attributes_child.as_map()?;
let prop = attributes_map.get(key).map(|token| *token);
match prop.map(|token| token.as_atom()) {
Some(Ok("nil")) => return Ok(None),
2023-08-29 22:07:23 -04:00
_ => {}
};
Ok(prop)
2023-08-29 22:07:23 -04:00
}
2023-09-29 13:03:01 -04:00
/// Get a named property containing an unquoted atom from the emacs token.
///
/// Returns None if key is not found.
pub(crate) fn get_property_unquoted_atom<'b, 's, 'x>(
emacs: &'b Token<'s>,
2023-09-29 13:03:01 -04:00
key: &'x str,
) -> Result<Option<&'s str>, Box<dyn std::error::Error>> {
Ok(get_property(emacs, key)?
.map(Token::as_atom)
.map_or(Ok(None), |r| r.map(Some))?)
}
2023-09-29 17:28:50 -04:00
/// Get a named property containing an quoted string from the emacs token.
///
/// Returns None if key is not found.
pub(crate) fn get_property_quoted_string<'b, 's, 'x>(
emacs: &'b Token<'s>,
2023-09-29 17:28:50 -04:00
key: &'x str,
) -> Result<Option<String>, Box<dyn std::error::Error>> {
Ok(get_property(emacs, key)?
.map(Token::as_atom)
.map_or(Ok(None), |r| r.map(Some))?
.map(unquote)
.map_or(Ok(None), |r| r.map(Some))?)
}
2023-10-02 10:48:34 -04:00
/// Get a named property containing an unquoted numeric value.
///
/// Returns None if key is not found.
pub(crate) fn get_property_numeric<'b, 's, 'x, N: FromStr>(
emacs: &'b Token<'s>,
key: &'x str,
) -> Result<Option<N>, Box<dyn std::error::Error + 's>>
where
<N as FromStr>::Err: std::error::Error,
<N as FromStr>::Err: 's,
{
2023-10-02 15:59:06 -04:00
let unparsed_string = get_property(emacs, key)?
.map(Token::as_atom)
.map_or(Ok(None), |r| r.map(Some))?;
2023-10-02 15:59:06 -04:00
let parsed_number = unparsed_string
.map(|val| val.parse::<N>())
.map_or(Ok(None), |r| r.map(Some))?;
2023-10-02 15:59:06 -04:00
Ok(parsed_number)
}
pub(crate) fn compare_children<'b, 's, 'x, RC>(
source: &'s str,
emacs: &'b Token<'s>,
rust_children: &'x Vec<RC>,
child_status: &mut Vec<DiffEntry<'b, 's>>,
this_status: &mut DiffStatus,
message: &mut Option<String>,
) -> Result<(), Box<dyn std::error::Error>>
where
AstNode<'b, 's>: From<&'x RC>,
{
let emacs_children = emacs.as_list()?;
let emacs_children_length = emacs_children.len() - 2;
if emacs_children_length != rust_children.len() {
*this_status = DiffStatus::Bad;
*message = Some(format!(
"Child length mismatch (emacs != rust) {:?} != {:?}",
emacs_children_length,
rust_children.len()
));
}
for (emacs_child, rust_child) in emacs_children.iter().skip(2).zip(rust_children.iter()) {
child_status.push(compare_ast_node(source, emacs_child, rust_child.into())?);
}
Ok(())
}
pub(crate) fn compare_children_iter<'b, 's, RC, RI: Iterator<Item = RC> + ExactSizeIterator>(
source: &'s str,
emacs: &'b Token<'s>,
rust_children: RI,
child_status: &mut Vec<DiffEntry<'b, 's>>,
this_status: &mut DiffStatus,
message: &mut Option<String>,
) -> Result<(), Box<dyn std::error::Error>>
where
AstNode<'b, 's>: From<RC>,
{
let emacs_children = emacs.as_list()?;
let emacs_children_length = emacs_children.len() - 2;
if emacs_children_length != rust_children.len() {
*this_status = DiffStatus::Bad;
*message = Some(format!(
"Child length mismatch (emacs != rust) {:?} != {:?}",
emacs_children_length,
rust_children.len()
));
}
for (emacs_child, rust_child) in emacs_children.iter().skip(2).zip(rust_children) {
child_status.push(compare_ast_node(source, emacs_child, rust_child.into())?);
}
Ok(())
}
pub(crate) fn assert_no_children<'b, 's>(
emacs: &'b Token<'s>,
this_status: &mut DiffStatus,
message: &mut Option<String>,
) -> Result<(), Box<dyn std::error::Error>> {
let emacs_children_length = emacs.as_list()?.len();
// 2, one for the name of the node and one for the properties. Children would come after that.
if emacs_children_length != 2 {
*this_status = DiffStatus::Bad;
*message = Some(format!(
"Should have no children but emacs has {:?} children.",
emacs_children_length - 2,
));
}
Ok(())
}
pub(crate) fn compare_additional_properties<'b, 's, RK, RV, RI>(
emacs: &'b Token<'s>,
rust_children: RI,
) -> Result<ComparePropertiesResult<'b, 's>, Box<dyn std::error::Error>>
where
RK: AsRef<str>,
RV: AsRef<str>,
RI: Iterator<Item = (RK, RV)> + ExactSizeIterator,
{
for (rust_key, rust_value) in rust_children {
let rust_key = rust_key.as_ref();
let rust_value = rust_value.as_ref();
let emacs_value = get_property_quoted_string(emacs, rust_key)?;
if Some(rust_value) != emacs_value.as_ref().map(String::as_str) {
let this_status = DiffStatus::Bad;
let message = Some(format!(
"{} mismatch (emacs != rust) {:?} != {:?}",
rust_key, emacs_value, rust_value
));
return Ok(ComparePropertiesResult::SelfChange(this_status, message));
}
}
Ok(ComparePropertiesResult::NoChange)
}
2023-10-11 12:42:42 -04:00
pub(crate) fn compare_affiliated_keywords<'b, 's, GAK>(
2023-10-11 13:00:21 -04:00
source: &'s str,
2023-10-11 12:42:42 -04:00
emacs: &'b Token<'s>,
rust: &'b GAK,
2023-10-11 13:00:21 -04:00
) -> Result<Vec<ComparePropertiesResult<'b, 's>>, Box<dyn std::error::Error>>
2023-10-11 12:42:42 -04:00
where
GAK: GetAffiliatedKeywords<'s>,
{
2023-10-11 13:00:21 -04:00
let mut ret = Vec::new();
2023-10-11 12:42:42 -04:00
let affiliated_keywords = rust.get_affiliated_keywords();
for (rust_name, rust_value) in affiliated_keywords.keywords.iter() {
let emacs_property_name = format!(":{}", rust_name);
2023-10-11 13:00:21 -04:00
match rust_value {
AffiliatedKeywordValue::SingleString(rust_value) => {
let diff = compare_property_quoted_string(
source,
emacs,
rust,
emacs_property_name.as_str(),
|_| Some(*rust_value),
)?;
ret.push(diff);
}
AffiliatedKeywordValue::ListOfStrings(rust_value) => {
// foo
}
AffiliatedKeywordValue::ListOfListsOfObjects(rust_value) => {
// foo
}
};
2023-10-11 12:42:42 -04:00
}
2023-10-11 13:00:21 -04:00
Ok(ret)
2023-10-11 12:42:42 -04:00
}
pub(crate) fn affiliated_keywords_names<'s, GAK>(rust: &'s GAK) -> impl Iterator<Item = String> + 's
where
GAK: GetAffiliatedKeywords<'s>,
{
rust.get_affiliated_keywords()
.keywords
.keys()
.map(|k| format!(":{}", k))
}