organic/src/compare/util.rs

177 lines
6.3 KiB
Rust

use super::sexp::Token;
use crate::types::StandardProperties;
/// Check if the child string slice is a slice of the parent string slice.
fn is_slice_of(parent: &str, child: &str) -> bool {
let parent_start = parent.as_ptr() as usize;
let parent_end = parent_start + parent.len();
let child_start = child.as_ptr() as usize;
let child_end = child_start + child.len();
child_start >= parent_start && child_end <= parent_end
}
/// Get the byte offset into source that the rust object exists at.
///
/// These offsets are zero-based unlike the elisp ones.
fn get_rust_byte_offsets<'s, S: StandardProperties<'s> + ?Sized>(
original_document: &'s str,
rust_ast_node: &'s S,
) -> (usize, usize) {
let rust_object_source = rust_ast_node.get_source();
debug_assert!(is_slice_of(original_document, rust_object_source));
let offset = rust_object_source.as_ptr() as usize - original_document.as_ptr() as usize;
let end = offset + rust_object_source.len();
(offset, end)
}
pub(crate) fn assert_name<'s>(
emacs: &'s Token<'s>,
name: &str,
) -> Result<(), Box<dyn std::error::Error>> {
let children = emacs.as_list()?;
let first_child = children
.first()
.ok_or("Should have at least one child.")?
.as_atom()?;
if first_child != name {
Err(format!(
"Expected a {expected} cell, but found a {found} cell.",
expected = name,
found = first_child
))?;
}
Ok(())
}
/// Assert that the character ranges defined by upstream org-mode's :standard-properties match the slices in Organic's StandardProperties.
///
/// This does **not** handle plain text because plain text is a special case.
pub(crate) fn assert_bounds<'s, S: StandardProperties<'s> + ?Sized>(
original_document: &'s str,
emacs: &'s Token<'s>,
rust: &'s S,
) -> Result<(), Box<dyn std::error::Error>> {
let standard_properties = get_emacs_standard_properties(emacs)?; // 1-based
let (begin, end) = (
standard_properties
.begin
.ok_or("Token should have a begin.")?,
standard_properties.end.ok_or("Token should have an end.")?,
);
let (rust_begin, rust_end) = get_rust_byte_offsets(original_document, rust); // 0-based
let rust_begin_char_offset = (&original_document[..rust_begin]).chars().count() + 1; // 1-based
let rust_end_char_offset =
rust_begin_char_offset + (&original_document[rust_begin..rust_end]).chars().count(); // 1-based
if rust_begin_char_offset != begin || rust_end_char_offset != end {
Err(format!("Rust bounds (in chars) ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin_char_offset, rust_end = rust_end_char_offset, emacs_begin=begin, emacs_end=end))?;
}
Ok(())
}
struct EmacsStandardProperties {
begin: Option<usize>,
#[allow(dead_code)]
post_affiliated: Option<usize>,
#[allow(dead_code)]
contents_begin: Option<usize>,
#[allow(dead_code)]
contents_end: Option<usize>,
end: Option<usize>,
#[allow(dead_code)]
post_blank: Option<usize>,
}
fn get_emacs_standard_properties<'s>(
emacs: &'s Token<'s>,
) -> Result<EmacsStandardProperties, Box<dyn std::error::Error>> {
let children = emacs.as_list()?;
let attributes_child = children
.iter()
.nth(1)
.ok_or("Should have an attributes child.")?;
let attributes_map = attributes_child.as_map()?;
let standard_properties = attributes_map.get(":standard-properties");
Ok(if standard_properties.is_some() {
let mut std_props = standard_properties
.expect("if statement proves its Some")
.as_vector()?
.into_iter();
let begin = maybe_token_to_usize(std_props.next())?;
let post_affiliated = maybe_token_to_usize(std_props.next())?;
let contents_begin = maybe_token_to_usize(std_props.next())?;
let contents_end = maybe_token_to_usize(std_props.next())?;
let end = maybe_token_to_usize(std_props.next())?;
let post_blank = maybe_token_to_usize(std_props.next())?;
EmacsStandardProperties {
begin,
post_affiliated,
contents_begin,
contents_end,
end,
post_blank,
}
} else {
let begin = maybe_token_to_usize(attributes_map.get(":begin").map(|token| *token))?;
let end = maybe_token_to_usize(attributes_map.get(":end").map(|token| *token))?;
let contents_begin =
maybe_token_to_usize(attributes_map.get(":contents-begin").map(|token| *token))?;
let contents_end =
maybe_token_to_usize(attributes_map.get(":contents-end").map(|token| *token))?;
let post_blank =
maybe_token_to_usize(attributes_map.get(":post-blank").map(|token| *token))?;
let post_affiliated =
maybe_token_to_usize(attributes_map.get(":post-affiliated").map(|token| *token))?;
EmacsStandardProperties {
begin,
post_affiliated,
contents_begin,
contents_end,
end,
post_blank,
}
})
}
fn maybe_token_to_usize(
token: Option<&Token<'_>>,
) -> Result<Option<usize>, Box<dyn std::error::Error>> {
Ok(token
.map(|token| token.as_atom())
.map_or(Ok(None), |r| r.map(Some))?
.map(|val| {
if val == "nil" {
None
} else {
Some(val.parse::<usize>())
}
})
.flatten() // Outer option is whether or not the param exists, inner option is whether or not it is nil
.map_or(Ok(None), |r| r.map(Some))?)
}
/// Get a named property from the emacs token.
///
/// Returns Ok(None) if value is nil.
///
/// Returns error if the attribute is not specified on the token at all.
pub(crate) fn get_property<'s, 'x>(
emacs: &'s Token<'s>,
key: &'x str,
) -> Result<Option<&'s Token<'s>>, Box<dyn std::error::Error>> {
let children = emacs.as_list()?;
let attributes_child = children
.iter()
.nth(1)
.ok_or("Should have an attributes child.")?;
let attributes_map = attributes_child.as_map()?;
let prop = attributes_map
.get(key)
.ok_or(format!("Missing {} attribute.", key))?;
match prop.as_atom() {
Ok("nil") => return Ok(None),
_ => {}
};
Ok(Some(*prop))
}