organic/src/compare/util.rs

258 lines
8.8 KiB
Rust
Raw Normal View History

use std::str::FromStr;
use super::elisp_fact::GetElispFact;
2023-09-11 15:31:48 -04:00
use super::sexp::Token;
2023-09-29 17:28:50 -04:00
use crate::compare::sexp::unquote;
use crate::types::GetStandardProperties;
2023-09-23 17:59:13 -04:00
use crate::types::StandardProperties;
/// Check if the child string slice is a slice of the parent string slice.
fn is_slice_of(parent: &str, child: &str) -> bool {
let parent_start = parent.as_ptr() as usize;
let parent_end = parent_start + parent.len();
let child_start = child.as_ptr() as usize;
let child_end = child_start + child.len();
child_start >= parent_start && child_end <= parent_end
}
2023-09-23 17:59:13 -04:00
/// Get the byte offset into source that the rust object exists at.
///
/// These offsets are zero-based unlike the elisp ones.
fn get_rust_byte_offsets<'b, 's, S: StandardProperties<'s> + ?Sized>(
2023-09-23 17:59:13 -04:00
original_document: &'s str,
rust_ast_node: &'b S,
2023-09-23 17:59:13 -04:00
) -> (usize, usize) {
let rust_object_source = rust_ast_node.get_source();
debug_assert!(is_slice_of(original_document, rust_object_source));
let offset = rust_object_source.as_ptr() as usize - original_document.as_ptr() as usize;
let end = offset + rust_object_source.len();
(offset, end)
}
pub(crate) fn compare_standard_properties<
'b,
's,
S: GetStandardProperties<'s> + GetElispFact<'s> + ?Sized,
>(
original_document: &'s str,
emacs: &'b Token<'s>,
rust: &'b S,
) -> Result<(), Box<dyn std::error::Error>> {
assert_name(emacs, rust.get_elisp_fact().get_elisp_name())?;
assert_bounds(original_document, emacs, rust.get_standard_properties())?;
Ok(())
}
pub(crate) fn assert_name<'b, 's, S: AsRef<str>>(
emacs: &'b Token<'s>,
name: S,
2023-09-11 13:13:28 -04:00
) -> Result<(), Box<dyn std::error::Error>> {
let name = name.as_ref();
let children = emacs.as_list()?;
let first_child = children
.first()
.ok_or("Should have at least one child.")?
.as_atom()?;
if first_child != name {
Err(format!(
"AST node name mismatch. Expected a (rust) {expected} cell, but found a (emacs) {found} cell.",
expected = name,
found = first_child
))?;
}
Ok(())
}
2023-04-19 15:29:46 -04:00
2023-09-23 17:59:13 -04:00
/// Assert that the character ranges defined by upstream org-mode's :standard-properties match the slices in Organic's StandardProperties.
///
/// This does **not** handle plain text because plain text is a special case.
pub(crate) fn assert_bounds<'b, 's, S: StandardProperties<'s> + ?Sized>(
2023-09-23 17:59:13 -04:00
original_document: &'s str,
emacs: &'b Token<'s>,
rust: &'b S,
2023-04-19 15:29:46 -04:00
) -> Result<(), Box<dyn std::error::Error>> {
2023-09-23 17:59:13 -04:00
let standard_properties = get_emacs_standard_properties(emacs)?; // 1-based
2023-08-25 02:55:01 -04:00
let (begin, end) = (
standard_properties
.begin
.ok_or("Token should have a begin.")?,
standard_properties.end.ok_or("Token should have an end.")?,
2023-08-25 02:55:01 -04:00
);
2023-09-23 17:59:13 -04:00
let (rust_begin, rust_end) = get_rust_byte_offsets(original_document, rust); // 0-based
let rust_begin_char_offset = (&original_document[..rust_begin]).chars().count() + 1; // 1-based
let rust_end_char_offset =
2023-09-23 17:59:13 -04:00
rust_begin_char_offset + (&original_document[rust_begin..rust_end]).chars().count(); // 1-based
if rust_begin_char_offset != begin || rust_end_char_offset != end {
Err(format!("Rust bounds (in chars) ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin_char_offset, rust_end = rust_end_char_offset, emacs_begin=begin, emacs_end=end))?;
2023-08-25 02:55:01 -04:00
}
Ok(())
}
struct EmacsStandardProperties {
2023-08-25 02:55:01 -04:00
begin: Option<usize>,
#[allow(dead_code)]
post_affiliated: Option<usize>,
#[allow(dead_code)]
contents_begin: Option<usize>,
#[allow(dead_code)]
contents_end: Option<usize>,
end: Option<usize>,
#[allow(dead_code)]
post_blank: Option<usize>,
}
fn get_emacs_standard_properties<'b, 's>(
emacs: &'b Token<'s>,
) -> Result<EmacsStandardProperties, Box<dyn std::error::Error>> {
2023-04-19 15:29:46 -04:00
let children = emacs.as_list()?;
let attributes_child = children
.iter()
.nth(1)
.ok_or("Should have an attributes child.")?;
let attributes_map = attributes_child.as_map()?;
let standard_properties = attributes_map.get(":standard-properties");
2023-08-25 02:55:01 -04:00
Ok(if standard_properties.is_some() {
let mut std_props = standard_properties
.expect("if statement proves its Some")
2023-08-25 02:55:01 -04:00
.as_vector()?
.into_iter();
let begin = maybe_token_to_usize(std_props.next())?;
let post_affiliated = maybe_token_to_usize(std_props.next())?;
let contents_begin = maybe_token_to_usize(std_props.next())?;
let contents_end = maybe_token_to_usize(std_props.next())?;
let end = maybe_token_to_usize(std_props.next())?;
let post_blank = maybe_token_to_usize(std_props.next())?;
EmacsStandardProperties {
2023-08-25 02:55:01 -04:00
begin,
post_affiliated,
contents_begin,
contents_end,
end,
post_blank,
}
} else {
2023-08-25 02:55:01 -04:00
let begin = maybe_token_to_usize(attributes_map.get(":begin").map(|token| *token))?;
let end = maybe_token_to_usize(attributes_map.get(":end").map(|token| *token))?;
let contents_begin =
maybe_token_to_usize(attributes_map.get(":contents-begin").map(|token| *token))?;
let contents_end =
maybe_token_to_usize(attributes_map.get(":contents-end").map(|token| *token))?;
let post_blank =
maybe_token_to_usize(attributes_map.get(":post-blank").map(|token| *token))?;
let post_affiliated =
maybe_token_to_usize(attributes_map.get(":post-affiliated").map(|token| *token))?;
EmacsStandardProperties {
2023-08-25 02:55:01 -04:00
begin,
post_affiliated,
contents_begin,
contents_end,
end,
post_blank,
}
})
}
2023-04-19 15:29:46 -04:00
2023-08-25 02:55:01 -04:00
fn maybe_token_to_usize(
token: Option<&Token<'_>>,
) -> Result<Option<usize>, Box<dyn std::error::Error>> {
Ok(token
.map(|token| token.as_atom())
.map_or(Ok(None), |r| r.map(Some))?
.map(|val| {
if val == "nil" {
None
} else {
Some(val.parse::<usize>())
}
})
.flatten() // Outer option is whether or not the param exists, inner option is whether or not it is nil
.map_or(Ok(None), |r| r.map(Some))?)
2023-04-19 15:29:46 -04:00
}
2023-08-29 22:07:23 -04:00
/// Get a named property from the emacs token.
///
/// Returns Ok(None) if value is nil or absent.
pub(crate) fn get_property<'b, 's, 'x>(
emacs: &'b Token<'s>,
2023-08-29 22:07:23 -04:00
key: &'x str,
) -> Result<Option<&'b Token<'s>>, Box<dyn std::error::Error>> {
2023-08-29 22:07:23 -04:00
let children = emacs.as_list()?;
let attributes_child = children
.iter()
.nth(1)
.ok_or("Should have an attributes child.")?;
let attributes_map = attributes_child.as_map()?;
let prop = attributes_map.get(key).map(|token| *token);
match prop
.map(|token| token.as_atom())
.map_or(Ok(None), |r| r.map(Some))?
{
Some("nil") => return Ok(None),
2023-08-29 22:07:23 -04:00
_ => {}
};
Ok(prop)
2023-08-29 22:07:23 -04:00
}
2023-09-29 13:03:01 -04:00
/// Get a named property containing an unquoted atom from the emacs token.
///
/// Returns None if key is not found.
pub(crate) fn get_property_unquoted_atom<'b, 's, 'x>(
emacs: &'b Token<'s>,
2023-09-29 13:03:01 -04:00
key: &'x str,
) -> Result<Option<&'s str>, Box<dyn std::error::Error>> {
Ok(get_property(emacs, key)?
.map(Token::as_atom)
.map_or(Ok(None), |r| r.map(Some))?)
}
2023-09-29 17:28:50 -04:00
/// Get a named property containing an quoted string from the emacs token.
///
/// Returns None if key is not found.
pub(crate) fn get_property_quoted_string<'b, 's, 'x>(
emacs: &'b Token<'s>,
2023-09-29 17:28:50 -04:00
key: &'x str,
) -> Result<Option<String>, Box<dyn std::error::Error>> {
Ok(get_property(emacs, key)?
.map(Token::as_atom)
.map_or(Ok(None), |r| r.map(Some))?
.map(unquote)
.map_or(Ok(None), |r| r.map(Some))?)
}
2023-10-02 10:48:34 -04:00
/// Get a named property containing a boolean value.
///
/// This uses the elisp convention of nil == false, non-nil == true.
///
/// Returns false if key is not found.
pub(crate) fn get_property_boolean<'b, 's, 'x>(
emacs: &'b Token<'s>,
2023-10-02 10:48:34 -04:00
key: &'x str,
) -> Result<bool, Box<dyn std::error::Error>> {
Ok(get_property(emacs, key)?
.map(Token::as_atom)
.map_or(Ok(None), |r| r.map(Some))?
.unwrap_or("nil")
!= "nil")
}
/// Get a named property containing an unquoted numeric value.
///
/// Returns None if key is not found.
pub(crate) fn get_property_numeric<'b, 's, 'x, N: FromStr>(
emacs: &'b Token<'s>,
key: &'x str,
) -> Result<Option<N>, Box<dyn std::error::Error + 's>>
where
<N as FromStr>::Err: std::error::Error,
<N as FromStr>::Err: 's,
{
2023-10-02 15:59:06 -04:00
let unparsed_string = get_property(emacs, key)?
.map(Token::as_atom)
.map_or(Ok(None), |r| r.map(Some))?;
2023-10-02 15:59:06 -04:00
let parsed_number = unparsed_string
.map(|val| val.parse::<N>())
.map_or(Ok(None), |r| r.map(Some))?;
2023-10-02 15:59:06 -04:00
Ok(parsed_number)
}