Files
organic/src/compare/util.rs
Tom Alexander acfc5e5e68
All checks were successful
rust-build Build rust-build has succeeded
rust-foreign-document-test Build rust-foreign-document-test has succeeded
rust-test Build rust-test has succeeded
clippy Build clippy has succeeded
rustfmt Build rustfmt has succeeded
Only allocate memory when unquoting sexp string that contains escapes.
If the quoted string contains no escape sequences, then unquoting the string can be done by simply shaving off the leading and trailing quotation marks which can be a slice operation. By returning Cow, we can return either a borrowed slice or an owned String.
2023-10-20 12:53:27 -04:00

404 lines
14 KiB
Rust

use std::borrow::Cow;
use std::str::FromStr;
use super::compare_field::compare_property_list_of_quoted_string;
use super::compare_field::compare_property_object_tree;
use super::compare_field::compare_property_optional_pair;
use super::compare_field::compare_property_quoted_string;
use super::compare_field::ComparePropertiesResult;
use super::diff::DiffEntry;
use super::diff::DiffStatus;
use super::elisp_fact::GetElispFact;
use super::sexp::Token;
use crate::compare::diff::compare_ast_node;
use crate::compare::sexp::unquote;
use crate::types::AffiliatedKeywordValue;
use crate::types::AstNode;
use crate::types::GetAffiliatedKeywords;
use crate::types::GetStandardProperties;
use crate::types::StandardProperties;
/// Check if the child string slice is a slice of the parent string slice.
fn is_slice_of(parent: &str, child: &str) -> bool {
let parent_start = parent.as_ptr() as usize;
let parent_end = parent_start + parent.len();
let child_start = child.as_ptr() as usize;
let child_end = child_start + child.len();
child_start >= parent_start && child_end <= parent_end
}
/// Get the byte offset into source that the rust object exists at.
///
/// These offsets are zero-based unlike the elisp ones.
fn get_rust_byte_offsets<'b, 's, S: StandardProperties<'s> + ?Sized>(
original_document: &'s str,
rust_ast_node: &'b S,
) -> (usize, usize) {
let rust_object_source = rust_ast_node.get_source();
debug_assert!(is_slice_of(original_document, rust_object_source));
let offset = rust_object_source.as_ptr() as usize - original_document.as_ptr() as usize;
let end = offset + rust_object_source.len();
(offset, end)
}
pub(crate) fn compare_standard_properties<
'b,
's,
S: GetStandardProperties<'s> + GetElispFact<'s> + ?Sized,
>(
original_document: &'s str,
emacs: &'b Token<'s>,
rust: &'b S,
) -> Result<(), Box<dyn std::error::Error>> {
assert_name(emacs, rust.get_elisp_fact().get_elisp_name())?;
assert_bounds(original_document, emacs, rust.get_standard_properties())?;
Ok(())
}
pub(crate) fn assert_name<S: AsRef<str>>(
emacs: &Token<'_>,
name: S,
) -> Result<(), Box<dyn std::error::Error>> {
let name = name.as_ref();
let children = emacs.as_list()?;
let first_child = children
.first()
.ok_or("Should have at least one child.")?
.as_atom()?;
if first_child != name {
Err(format!(
"AST node name mismatch. Expected a (rust) {expected} cell, but found a (emacs) {found} cell.",
expected = name,
found = first_child
))?;
}
Ok(())
}
/// Assert that the character ranges defined by upstream org-mode's :standard-properties match the slices in Organic's StandardProperties.
///
/// This does **not** handle plain text because plain text is a special case.
pub(crate) fn assert_bounds<'b, 's, S: StandardProperties<'s> + ?Sized>(
original_document: &'s str,
emacs: &'b Token<'s>,
rust: &'b S,
) -> Result<(), Box<dyn std::error::Error>> {
let standard_properties = get_emacs_standard_properties(emacs)?; // 1-based
let (begin, end) = (
standard_properties
.begin
.ok_or("Token should have a begin.")?,
standard_properties.end.ok_or("Token should have an end.")?,
);
let (rust_begin, rust_end) = get_rust_byte_offsets(original_document, rust); // 0-based
let rust_begin_char_offset = original_document[..rust_begin].chars().count() + 1; // 1-based
let rust_end_char_offset =
rust_begin_char_offset + original_document[rust_begin..rust_end].chars().count(); // 1-based
if rust_begin_char_offset != begin || rust_end_char_offset != end {
Err(format!("Rust bounds (in chars) ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin_char_offset, rust_end = rust_end_char_offset, emacs_begin=begin, emacs_end=end))?;
}
Ok(())
}
struct EmacsStandardProperties {
begin: Option<usize>,
#[allow(dead_code)]
post_affiliated: Option<usize>,
#[allow(dead_code)]
contents_begin: Option<usize>,
#[allow(dead_code)]
contents_end: Option<usize>,
end: Option<usize>,
#[allow(dead_code)]
post_blank: Option<usize>,
}
fn get_emacs_standard_properties(
emacs: &Token<'_>,
) -> Result<EmacsStandardProperties, Box<dyn std::error::Error>> {
let children = emacs.as_list()?;
let attributes_child = children.get(1).ok_or("Should have an attributes child.")?;
let attributes_map = attributes_child.as_map()?;
let standard_properties = attributes_map.get(":standard-properties");
Ok(if standard_properties.is_some() {
let mut std_props = standard_properties
.expect("if statement proves its Some")
.as_vector()?
.iter();
let begin = maybe_token_to_usize(std_props.next())?;
let post_affiliated = maybe_token_to_usize(std_props.next())?;
let contents_begin = maybe_token_to_usize(std_props.next())?;
let contents_end = maybe_token_to_usize(std_props.next())?;
let end = maybe_token_to_usize(std_props.next())?;
let post_blank = maybe_token_to_usize(std_props.next())?;
EmacsStandardProperties {
begin,
post_affiliated,
contents_begin,
contents_end,
end,
post_blank,
}
} else {
let begin = maybe_token_to_usize(attributes_map.get(":begin").copied())?;
let end = maybe_token_to_usize(attributes_map.get(":end").copied())?;
let contents_begin = maybe_token_to_usize(attributes_map.get(":contents-begin").copied())?;
let contents_end = maybe_token_to_usize(attributes_map.get(":contents-end").copied())?;
let post_blank = maybe_token_to_usize(attributes_map.get(":post-blank").copied())?;
let post_affiliated =
maybe_token_to_usize(attributes_map.get(":post-affiliated").copied())?;
EmacsStandardProperties {
begin,
post_affiliated,
contents_begin,
contents_end,
end,
post_blank,
}
})
}
fn maybe_token_to_usize(
token: Option<&Token<'_>>,
) -> Result<Option<usize>, Box<dyn std::error::Error>> {
Ok(token
.map(|token| token.as_atom())
.map_or(Ok(None), |r| r.map(Some))?
.and_then(|val| {
if val == "nil" {
None
} else {
Some(val.parse::<usize>())
}
})
.map_or(Ok(None), |r| r.map(Some))?)
}
/// Get a named property from the emacs token.
///
/// Returns Ok(None) if value is nil or absent.
pub(crate) fn get_property<'b, 's>(
emacs: &'b Token<'s>,
key: &str,
) -> Result<Option<&'b Token<'s>>, Box<dyn std::error::Error>> {
let children = emacs.as_list()?;
let attributes_child = children.get(1).ok_or("Should have an attributes child.")?;
let attributes_map = attributes_child.as_map()?;
let prop = attributes_map.get(key).copied();
if let Some(Ok("nil")) = prop.map(Token::as_atom) {
return Ok(None);
}
Ok(prop)
}
/// Get a named property containing an unquoted atom from the emacs token.
///
/// Returns None if key is not found.
pub(crate) fn get_property_unquoted_atom<'s>(
emacs: &Token<'s>,
key: &str,
) -> Result<Option<&'s str>, Box<dyn std::error::Error>> {
get_property(emacs, key)?
.map(Token::as_atom)
.map_or(Ok(None), |r| r.map(Some))
}
/// Get a named property containing an quoted string from the emacs token.
///
/// Returns None if key is not found.
pub(crate) fn get_property_quoted_string<'s>(
emacs: &Token<'s>,
key: &str,
) -> Result<Option<Cow<'s, str>>, Box<dyn std::error::Error>> {
get_property(emacs, key)?
.map(Token::as_atom)
.map_or(Ok(None), |r| r.map(Some))?
.map(unquote)
.map_or(Ok(None), |r| r.map(Some))
}
/// Get a named property containing an unquoted numeric value.
///
/// Returns None if key is not found.
pub(crate) fn get_property_numeric<'b, 's, 'x, N: FromStr>(
emacs: &'b Token<'s>,
key: &'x str,
) -> Result<Option<N>, Box<dyn std::error::Error + 's>>
where
<N as FromStr>::Err: std::error::Error,
<N as FromStr>::Err: 's,
{
let unparsed_string = get_property(emacs, key)?
.map(Token::as_atom)
.map_or(Ok(None), |r| r.map(Some))?;
let parsed_number = unparsed_string
.map(|val| val.parse::<N>())
.map_or(Ok(None), |r| r.map(Some))?;
Ok(parsed_number)
}
pub(crate) fn compare_children<'b, 's, 'x, RC>(
source: &'s str,
emacs: &'b Token<'s>,
rust_children: &'x Vec<RC>,
child_status: &mut Vec<DiffEntry<'b, 's>>,
this_status: &mut DiffStatus,
message: &mut Option<String>,
) -> Result<(), Box<dyn std::error::Error>>
where
AstNode<'b, 's>: From<&'x RC>,
{
let emacs_children = emacs.as_list()?;
let emacs_children_length = emacs_children.len() - 2;
if emacs_children_length != rust_children.len() {
*this_status = DiffStatus::Bad;
*message = Some(format!(
"Child length mismatch (emacs != rust) {:?} != {:?}",
emacs_children_length,
rust_children.len()
));
}
for (emacs_child, rust_child) in emacs_children.iter().skip(2).zip(rust_children.iter()) {
child_status.push(compare_ast_node(source, emacs_child, rust_child.into())?);
}
Ok(())
}
pub(crate) fn compare_children_iter<'b, 's, RC, RI: Iterator<Item = RC> + ExactSizeIterator>(
source: &'s str,
emacs: &'b Token<'s>,
rust_children: RI,
child_status: &mut Vec<DiffEntry<'b, 's>>,
this_status: &mut DiffStatus,
message: &mut Option<String>,
) -> Result<(), Box<dyn std::error::Error>>
where
AstNode<'b, 's>: From<RC>,
{
let emacs_children = emacs.as_list()?;
let emacs_children_length = emacs_children.len() - 2;
if emacs_children_length != rust_children.len() {
*this_status = DiffStatus::Bad;
*message = Some(format!(
"Child length mismatch (emacs != rust) {:?} != {:?}",
emacs_children_length,
rust_children.len()
));
}
for (emacs_child, rust_child) in emacs_children.iter().skip(2).zip(rust_children) {
child_status.push(compare_ast_node(source, emacs_child, rust_child.into())?);
}
Ok(())
}
pub(crate) fn assert_no_children(
emacs: &Token<'_>,
this_status: &mut DiffStatus,
message: &mut Option<String>,
) -> Result<(), Box<dyn std::error::Error>> {
let emacs_children_length = emacs.as_list()?.len();
// 2, one for the name of the node and one for the properties. Children would come after that.
if emacs_children_length != 2 {
*this_status = DiffStatus::Bad;
*message = Some(format!(
"Should have no children but emacs has {:?} children.",
emacs_children_length - 2,
));
}
Ok(())
}
pub(crate) fn compare_additional_properties<'b, 's, RK, RV, RI>(
emacs: &'b Token<'s>,
rust_children: RI,
) -> Result<ComparePropertiesResult<'b, 's>, Box<dyn std::error::Error>>
where
RK: AsRef<str>,
RV: AsRef<str>,
RI: Iterator<Item = (RK, RV)> + ExactSizeIterator,
{
for (rust_key, rust_value) in rust_children {
let rust_key = rust_key.as_ref();
let rust_value = rust_value.as_ref();
let emacs_value = get_property_quoted_string(emacs, rust_key)?;
if Some(rust_value) != emacs_value.as_deref() {
let this_status = DiffStatus::Bad;
let message = Some(format!(
"{} mismatch (emacs != rust) {:?} != {:?}",
rust_key, emacs_value, rust_value
));
return Ok(ComparePropertiesResult::SelfChange(this_status, message));
}
}
Ok(ComparePropertiesResult::NoChange)
}
pub(crate) fn compare_affiliated_keywords<'b, 's, GAK>(
source: &'s str,
emacs: &'b Token<'s>,
rust: &'b GAK,
) -> Result<Vec<ComparePropertiesResult<'b, 's>>, Box<dyn std::error::Error>>
where
GAK: GetAffiliatedKeywords<'s>,
{
let mut ret = Vec::new();
let affiliated_keywords = rust.get_affiliated_keywords();
for (rust_name, rust_value) in affiliated_keywords.keywords.iter() {
let emacs_property_name = format!(":{}", rust_name);
match rust_value {
AffiliatedKeywordValue::SingleString(rust_value) => {
let diff = compare_property_quoted_string(
source,
emacs,
rust,
emacs_property_name.as_str(),
|_| Some(*rust_value),
)?;
ret.push(diff);
}
AffiliatedKeywordValue::ListOfStrings(rust_value) => {
let diff = compare_property_list_of_quoted_string(
source,
emacs,
rust,
emacs_property_name.as_str(),
|_| Some(rust_value.iter()),
)?;
ret.push(diff);
}
AffiliatedKeywordValue::OptionalPair { optval, val } => {
let diff = compare_property_optional_pair(
source,
emacs,
rust,
emacs_property_name.as_str(),
|_| Some((*optval, *val)),
)?;
ret.push(diff);
}
AffiliatedKeywordValue::ObjectTree(rust_value) => {
let diff = compare_property_object_tree(
source,
emacs,
rust,
emacs_property_name.as_str(),
|_| Some(rust_value.iter()),
)?;
ret.push(diff);
}
};
}
Ok(ret)
}
pub(crate) fn affiliated_keywords_names<'s, GAK>(rust: &'s GAK) -> impl Iterator<Item = String> + 's
where
GAK: GetAffiliatedKeywords<'s>,
{
rust.get_affiliated_keywords()
.keywords
.keys()
.map(|k| format!(":{}", k))
}