diff --git a/build.rs b/build.rs index 4186b8b9..56366106 100644 --- a/build.rs +++ b/build.rs @@ -87,7 +87,6 @@ fn is_expect_fail(name: &str) -> Option<&str> { "autogen_greater_element_drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."), "autogen_element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."), "autogen_lesser_element_paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."), - "autogen_unicode_hearts" => Some("Unicode is coming out of emacs strange."), _ => None, } } diff --git a/docker/organic_test/Dockerfile b/docker/organic_test/Dockerfile index 6efcb466..05a3216c 100644 --- a/docker/organic_test/Dockerfile +++ b/docker/organic_test/Dockerfile @@ -26,6 +26,7 @@ RUN make DESTDIR="/root/dist" install FROM rustlang/rust:nightly-alpine3.17 +ENV LANG=en_US.UTF-8 RUN apk add --no-cache musl-dev ncurses gnutls RUN cargo install --locked --no-default-features --features ci-autoclean cargo-cache COPY --from=build-emacs /root/dist/ / diff --git a/src/compare/diff.rs b/src/compare/diff.rs index a633be8b..3f6e3d37 100644 --- a/src/compare/diff.rs +++ b/src/compare/diff.rs @@ -1457,7 +1457,7 @@ fn compare_plain_text<'s>( .as_atom()? .parse()?; let emacs_text_length = end_ind - start_ind; - if rust_source.len() != emacs_text_length { + if rust_source.chars().count() != emacs_text_length { this_status = DiffStatus::Bad; message = Some(format!( "(emacs len != rust len) {:?} != {:?}", diff --git a/src/compare/util.rs b/src/compare/util.rs index 7ef432a9..94c74513 100644 --- a/src/compare/util.rs +++ b/src/compare/util.rs @@ -13,7 +13,7 @@ fn is_slice_of(parent: &str, child: &str) -> bool { /// Get the offset into source that the rust object exists at. /// /// These offsets are zero-based unlike the elisp ones. -pub fn get_offsets<'s, S: Source<'s>>(source: &'s str, rust_object: &'s S) -> (usize, usize) { +fn get_offsets<'s, S: Source<'s>>(source: &'s str, rust_object: &'s S) -> (usize, usize) { let rust_object_source = rust_object.get_source(); assert!(is_slice_of(source, rust_object_source)); let offset = rust_object_source.as_ptr() as usize - source.as_ptr() as usize; @@ -50,8 +50,11 @@ pub fn assert_bounds<'s, S: Source<'s>>( standard_properties.end.ok_or("Token should have an end.")?, ); let (rust_begin, rust_end) = get_offsets(source, rust); - if (rust_begin + 1) != begin || (rust_end + 1) != end { - Err(format!("Rust bounds (in bytes) ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin + 1, rust_end = rust_end + 1, emacs_begin=begin, emacs_end=end))?; + let rust_begin_char_offset = (&source[..rust_begin]).chars().count(); + let rust_end_char_offset = + rust_begin_char_offset + (&source[rust_begin..rust_end]).chars().count(); + if (rust_begin_char_offset + 1) != begin || (rust_end_char_offset + 1) != end { + Err(format!("Rust bounds (in chars) ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin_char_offset + 1, rust_end = rust_end_char_offset + 1, emacs_begin=begin, emacs_end=end))?; } Ok(())