From 52b401d548b0cf41d8378e29f19556a1c2f987ef Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 11 Apr 2023 19:16:04 -0400 Subject: [PATCH] comparing begin and end offsets for top-level sections and headlines. --- src/compare/diff.rs | 52 +++++++++++++++++++++++++++++++++++++----- src/compare/mod.rs | 1 + src/compare/sexp.rs | 27 ++++++++++++++++++++++ src/compare/util.rs | 21 +++++++++++++++++ src/parser/document.rs | 12 ++++++++++ src/parser/mod.rs | 2 ++ 6 files changed, 109 insertions(+), 6 deletions(-) create mode 100644 src/compare/util.rs diff --git a/src/compare/diff.rs b/src/compare/diff.rs index 6f624e9..798a29c 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1,6 +1,8 @@ use super::sexp::Token; +use crate::compare::util::get_offsets; use crate::parser::Document; use crate::parser::Section; +use crate::parser::Heading; #[derive(Debug)] pub struct DiffResult { @@ -46,7 +48,7 @@ pub fn compare_document<'s>( return Err("Document should correspond to an org-data cell.".into()); } let mut child_status = Vec::new(); - // TODO: compare the children + let mut this_status = DiffStatus::Good; // Skipping "org-data" and the first parameter which is often nil for (i, token) in children.iter().skip(2).enumerate() { @@ -72,14 +74,14 @@ pub fn compare_document<'s>( .iter() .nth(i - rust.zeroth_section.as_ref().map(|_| 1).unwrap_or(0)) .ok_or("Should have a corresponding heading.")?; - child_status.push(compare_heading(rust.source, token, rust)?); + child_status.push(compare_heading(rust.source, token, corresponding_heading)?); } else { return Err("Document should only contain sections and headlines.".into()); } } Ok(DiffResult { - status: DiffStatus::Good, + status: this_status, name: "document".to_owned(), children: child_status, }) @@ -97,9 +99,28 @@ pub fn compare_section<'s>( return Err("Section should correspond to a section cell.".into()); } let mut child_status = Vec::new(); + let mut this_status = DiffStatus::Good; + + let attributes_child = children + .iter() + .nth(1) + .ok_or("Should have an attributes child.")?; + let attributes_map = attributes_child.as_map()?; + let begin = attributes_map + .get(":begin") + .ok_or("Missing :begin attribute.")? + .as_atom()?; + let end = attributes_map + .get(":end") + .ok_or("Missing :end attribute.")? + .as_atom()?; + let (rust_begin, rust_end) = get_offsets(source, rust); + if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end { + this_status = DiffStatus::Bad; + } Ok(DiffResult { - status: DiffStatus::Good, + status: this_status, name: "section".to_owned(), children: child_status, }) @@ -108,7 +129,7 @@ pub fn compare_section<'s>( pub fn compare_heading<'s>( source: &'s str, emacs: &'s Token<'s>, - rust: &'s Document<'s>, + rust: &'s Heading<'s>, ) -> Result> { let children = emacs.as_list()?; let first_child = children.first().ok_or("Should have at least one child.")?; @@ -117,9 +138,28 @@ pub fn compare_heading<'s>( return Err("Heading should correspond to a headline cell.".into()); } let mut child_status = Vec::new(); + let mut this_status = DiffStatus::Good; + + let attributes_child = children + .iter() + .nth(1) + .ok_or("Should have an attributes child.")?; + let attributes_map = attributes_child.as_map()?; + let begin = attributes_map + .get(":begin") + .ok_or("Missing :begin attribute.")? + .as_atom()?; + let end = attributes_map + .get(":end") + .ok_or("Missing :end attribute.")? + .as_atom()?; + let (rust_begin, rust_end) = get_offsets(source, rust); + if (rust_begin + 1).to_string() != begin || (rust_end + 1).to_string() != end { + this_status = DiffStatus::Bad; + } Ok(DiffResult { - status: DiffStatus::Good, + status: this_status, name: "heading".to_owned(), children: child_status, }) diff --git a/src/compare/mod.rs b/src/compare/mod.rs index 4dbb825..07f9207 100644 --- a/src/compare/mod.rs +++ b/src/compare/mod.rs @@ -2,6 +2,7 @@ mod diff; mod error; mod parse; mod sexp; +mod util; pub use diff::compare_document; pub use parse::emacs_parse_org_document; pub use sexp::sexp; diff --git a/src/compare/sexp.rs b/src/compare/sexp.rs index 5257940..948181d 100644 --- a/src/compare/sexp.rs +++ b/src/compare/sexp.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + use nom::branch::alt; use nom::bytes::complete::escaped; use nom::bytes::complete::tag; @@ -44,6 +46,31 @@ impl<'s> Token<'s> { _ => Err("wrong token type"), }?) } + + pub fn as_map<'p>( + &'p self, + ) -> Result>, Box> { + let mut hashmap = HashMap::new(); + + let children = self.as_list()?; + if children.len() % 2 != 0 { + return Err("Expecting an even number of children".into()); + } + let mut key: Option<&str> = None; + for child in children.iter() { + match key { + None => { + key = Some(child.as_atom()?); + } + Some(key_val) => { + key = None; + hashmap.insert(key_val, child); + } + }; + } + + Ok(hashmap) + } } #[tracing::instrument(ret, level = "debug")] diff --git a/src/compare/util.rs b/src/compare/util.rs new file mode 100644 index 0000000..1dd9463 --- /dev/null +++ b/src/compare/util.rs @@ -0,0 +1,21 @@ +use crate::parser::Source; + +/// Check if the child string slice is a slice of the parent string slice. +fn is_slice_of(parent: &str, child: &str) -> bool { + let parent_start = parent.as_ptr() as usize; + let parent_end = parent_start + parent.len(); + let child_start = child.as_ptr() as usize; + let child_end = child_start + child.len(); + child_start >= parent_start && child_end <= parent_end +} + +/// Get the offset into source that the rust object exists at. +/// +/// These offsets are zero-based unlike the elisp ones. +pub fn get_offsets<'s, S: Source<'s>>(source: &'s str, rust_object: &'s S) -> (usize, usize) { + let rust_object_source = rust_object.get_source(); + assert!(is_slice_of(source, rust_object_source)); + let offset = rust_object_source.as_ptr() as usize - source.as_ptr() as usize; + let end = offset + rust_object_source.len(); + (offset, end) +} diff --git a/src/parser/document.rs b/src/parser/document.rs index 83a4854..cf5addd 100644 --- a/src/parser/document.rs +++ b/src/parser/document.rs @@ -74,6 +74,18 @@ impl<'s> Source<'s> for DocumentElement<'s> { } } +impl<'s> Source<'s> for Section<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + +impl<'s> Source<'s> for Heading<'s> { + fn get_source(&'s self) -> &'s str { + self.source + } +} + #[tracing::instrument(ret, level = "debug")] #[allow(dead_code)] pub fn document(input: &str) -> Res<&str, Document> { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 74c00c5..63bf5d8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -17,4 +17,6 @@ mod util; pub use document::document; pub use document::Document; pub use document::Section; +pub use document::Heading; +pub use source::Source; type Context<'r, 's> = &'r parser_context::ContextTree<'r, 's>;