From f170a557edbd35d98f0b5338b99d745277f53746 Mon Sep 17 00:00:00 2001 From: Tom Alexander Date: Tue, 29 Aug 2023 21:49:16 -0400 Subject: [PATCH] Use character offsets in diff. --- build.rs | 1 + org_mode_samples/unicode/hearts.org | 1 + src/compare/diff.rs | 3 ++- src/compare/util.rs | 2 +- 4 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 org_mode_samples/unicode/hearts.org diff --git a/build.rs b/build.rs index c8b3c1d..999e073 100644 --- a/build.rs +++ b/build.rs @@ -75,6 +75,7 @@ fn is_expect_fail(name: &str) -> Option<&str> { "autogen_greater_element_drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."), "autogen_element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."), "autogen_lesser_element_paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), + "autogen_unicode_hearts" => Some("Unicode is coming out of emacs strange."), _ => None, } } diff --git a/org_mode_samples/unicode/hearts.org b/org_mode_samples/unicode/hearts.org new file mode 100644 index 0000000..f5f65d8 --- /dev/null +++ b/org_mode_samples/unicode/hearts.org @@ -0,0 +1 @@ +๐Ÿงก๐Ÿ’›๐Ÿ’š๐Ÿ’™๐Ÿ’œ diff --git a/src/compare/diff.rs b/src/compare/diff.rs index a32f208..f4f9450 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -115,12 +115,13 @@ impl<'s> DiffResult<'s> { } }; let rust_offset = self.rust_source.as_ptr() as usize - original_document.as_ptr() as usize; + let preceding_text = &original_document[..rust_offset]; println!( "{indentation}{status_text} {name} char({char_offset}) {message}", indentation = " ".repeat(indentation), status_text = status_text, name = self.name, - char_offset = rust_offset + 1, + char_offset = preceding_text.chars().count() + 1, message = self.message.as_ref().map(|m| m.as_str()).unwrap_or("") ); for child in self.children.iter() { diff --git a/src/compare/util.rs b/src/compare/util.rs index 25174f6..03c8e9b 100644 --- a/src/compare/util.rs +++ b/src/compare/util.rs @@ -51,7 +51,7 @@ pub fn assert_bounds<'s, S: Source<'s>>( ); let (rust_begin, rust_end) = get_offsets(source, rust); if (rust_begin + 1) != begin || (rust_end + 1) != end { - Err(format!("Rust bounds ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin + 1, rust_end = rust_end + 1, emacs_begin=begin, emacs_end=end))?; + Err(format!("Rust bounds (in bytes) ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin + 1, rust_end = rust_end + 1, emacs_begin=begin, emacs_end=end))?; } Ok(())