Fix handling of unicode in compare tests.
rustfmt Build rustfmt has succeeded Details
rust-test Build rust-test has succeeded Details
rust-build Build rust-build has succeeded Details

This commit is contained in:
Tom Alexander 2023-08-31 20:19:28 -04:00
parent ee92049e5d
commit 80d77ff5d6
Signed by: talexander
GPG Key ID: D3A179C9A53C0EDE
4 changed files with 8 additions and 5 deletions

View File

@ -87,7 +87,6 @@ fn is_expect_fail(name: &str) -> Option<&str> {
"autogen_greater_element_drawer_drawer_with_headline_inside" => Some("Apparently lines with :end: become their own paragraph. This odd behavior needs to be investigated more."),
"autogen_element_container_priority_footnote_definition_dynamic_block" => Some("Apparently broken begin lines become their own paragraph."),
"autogen_lesser_element_paragraphs_paragraph_with_backslash_line_breaks" => Some("The text we're getting out of the parse tree is already processed to remove line breaks, so our comparison needs to take that into account."),
"autogen_unicode_hearts" => Some("Unicode is coming out of emacs strange."),
_ => None,
}
}

View File

@ -26,6 +26,7 @@ RUN make DESTDIR="/root/dist" install
FROM rustlang/rust:nightly-alpine3.17
ENV LANG=en_US.UTF-8
RUN apk add --no-cache musl-dev ncurses gnutls
RUN cargo install --locked --no-default-features --features ci-autoclean cargo-cache
COPY --from=build-emacs /root/dist/ /

View File

@ -1457,7 +1457,7 @@ fn compare_plain_text<'s>(
.as_atom()?
.parse()?;
let emacs_text_length = end_ind - start_ind;
if rust_source.len() != emacs_text_length {
if rust_source.chars().count() != emacs_text_length {
this_status = DiffStatus::Bad;
message = Some(format!(
"(emacs len != rust len) {:?} != {:?}",

View File

@ -13,7 +13,7 @@ fn is_slice_of(parent: &str, child: &str) -> bool {
/// Get the offset into source that the rust object exists at.
///
/// These offsets are zero-based unlike the elisp ones.
pub fn get_offsets<'s, S: Source<'s>>(source: &'s str, rust_object: &'s S) -> (usize, usize) {
fn get_offsets<'s, S: Source<'s>>(source: &'s str, rust_object: &'s S) -> (usize, usize) {
let rust_object_source = rust_object.get_source();
assert!(is_slice_of(source, rust_object_source));
let offset = rust_object_source.as_ptr() as usize - source.as_ptr() as usize;
@ -50,8 +50,11 @@ pub fn assert_bounds<'s, S: Source<'s>>(
standard_properties.end.ok_or("Token should have an end.")?,
);
let (rust_begin, rust_end) = get_offsets(source, rust);
if (rust_begin + 1) != begin || (rust_end + 1) != end {
Err(format!("Rust bounds (in bytes) ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin + 1, rust_end = rust_end + 1, emacs_begin=begin, emacs_end=end))?;
let rust_begin_char_offset = (&source[..rust_begin]).chars().count();
let rust_end_char_offset =
rust_begin_char_offset + (&source[rust_begin..rust_end]).chars().count();
if (rust_begin_char_offset + 1) != begin || (rust_end_char_offset + 1) != end {
Err(format!("Rust bounds (in chars) ({rust_begin}, {rust_end}) do not match emacs bounds ({emacs_begin}, {emacs_end})", rust_begin = rust_begin_char_offset + 1, rust_end = rust_end_char_offset + 1, emacs_begin=begin, emacs_end=end))?;
}
Ok(())