From 46780ea2dc97a43538e5dca6513baea560aeb839 Mon Sep 17 00:00:00 2001 From: Xin LI Date: Thu, 8 Dec 2022 23:45:12 -0800 Subject: [PATCH] Vendor import of xz 5.2.9 (trimmed) --- AUTHORS | 6 +- ChangeLog | 195 +++++++++++++++++++++++++ README | 11 +- src/liblzma/api/lzma/filter.h | 3 +- src/liblzma/api/lzma/hardware.h | 2 +- src/liblzma/api/lzma/index_hash.h | 2 +- src/liblzma/api/lzma/version.h | 2 +- src/liblzma/common/block_encoder.c | 1 + src/liblzma/common/common.h | 12 ++ src/liblzma/common/filter_encoder.c | 4 +- src/liblzma/common/stream_encoder.c | 41 ++++-- src/liblzma/common/stream_encoder_mt.c | 4 + src/liblzma/lzma/lzma_encoder.c | 19 ++- 13 files changed, 274 insertions(+), 28 deletions(-) diff --git a/AUTHORS b/AUTHORS index 0186555975f4..d55d051891e2 100644 --- a/AUTHORS +++ b/AUTHORS @@ -6,12 +6,12 @@ Authors of XZ Utils and Jia Tan . Major parts of liblzma are based on code written by Igor Pavlov, - specifically the LZMA SDK . Without + specifically the LZMA SDK . Without this code, XZ Utils wouldn't exist. The SHA-256 implementation in liblzma is based on the code found from - 7-Zip , which has a modified version of the SHA-256 - code found from Crypto++ . The SHA-256 code + 7-Zip , which has a modified version of the SHA-256 + code found from Crypto++ . The SHA-256 code in Crypto++ was written by Kevin Springle and Wei Dai. Some scripts have been adapted from gzip. The original versions diff --git a/ChangeLog b/ChangeLog index de9c32fb2187..b71401c51521 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,198 @@ +commit d8a898eb9974683bc725c49ec76722f9a8758f48 +Author: Lasse Collin +Date: 2022-11-30 18:33:05 +0200 + + Bump version and soname for 5.2.9. + + src/liblzma/Makefile.am | 2 +- + src/liblzma/api/lzma/version.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +commit efd4430e21f798e198a6ee8a368a79065139eb54 +Author: Lasse Collin +Date: 2022-11-30 18:31:16 +0200 + + Add NEWS for 5.2.9. + + NEWS | 34 ++++++++++++++++++++++++++++++++++ + 1 file changed, 34 insertions(+) + +commit 2dc1bc97a562ef4d4134c8df4d344d0f403428b8 +Author: Lasse Collin +Date: 2022-11-30 18:08:34 +0200 + + Change the bug report address. + + It forwards to me and Jia Tan. + + Also update the IRC reference in README as #tukaani was moved + to Libera Chat long ago. + + CMakeLists.txt | 2 +- + README | 11 +++++------ + configure.ac | 2 +- + dos/config.h | 2 +- + windows/README-Windows.txt | 2 +- + 5 files changed, 9 insertions(+), 10 deletions(-) + +commit fb13a234d9def06cbe2b8ed88ece7db0c6d5c39f +Author: Lasse Collin +Date: 2022-11-30 17:38:32 +0200 + + Update to HTTPS URLs in AUTHORS. + + AUTHORS | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +commit 841448e36d4455d39ff1e1115843360934e7a507 +Author: Lasse Collin +Date: 2022-11-27 01:03:16 +0200 + + liblzma: Remove two FIXME comments. + + src/liblzma/common/filter_encoder.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +commit b61da00c7f535bd48930a878efdf6843d804983e +Author: Lasse Collin +Date: 2022-11-24 14:52:44 +0200 + + Build: Don't put GNU/Linux-specific symbol versions into static liblzma. + + It not only makes no sense to put symbol versions into a static library + but it can also cause breakage. + + By default Libtool #defines PIC if building a shared library and + doesn't define it for static libraries. This is documented in the + Libtool manual. It can be overriden using --with-pic or --without-pic. + configure.ac detects if --with-pic or --without-pic is used and then + gives an error if neither --disable-shared nor --disable-static was + used at the same time. Thus, in normal situations it works to build + both shared and static library at the same time on GNU/Linux, + only --with-pic or --without-pic requires that only one type of + library is built. + + Thanks to John Paul Adrian Glaubitz from Debian for reporting + the problem that occurred on ia64: + https://www.mail-archive.com/xz-devel@tukaani.org/msg00610.html + + CMakeLists.txt | 5 +- + configure.ac | 143 +++++++++++++++++++++++++++++--------------- + src/liblzma/common/common.h | 12 ++++ + 3 files changed, 111 insertions(+), 49 deletions(-) + +commit 6c29793b3cac292b25801d011db6d8ccade50396 +Author: Lasse Collin +Date: 2022-11-24 00:02:31 +0200 + + CMake: Don't use symbol versioning with static library. + + CMakeLists.txt | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +commit 872623def5e3e5c7f9f14f366d908f3e012d1007 +Author: Lasse Collin +Date: 2022-11-24 01:26:37 +0200 + + liblzma: Fix another invalid free() after memory allocation failure. + + This time it can happen when lzma_stream_encoder_mt() is used + to reinitialize an existing multi-threaded Stream encoder + and one of 1-4 tiny allocations in lzma_filters_copy() fail. + + It's very similar to the previous bug + 10430fbf3820dafd4eafd38ec8be161a6978ed2b, happening with + an array of lzma_filter structures whose old options are freed + but the replacement never arrives due to a memory allocation + failure in lzma_filters_copy(). + + src/liblzma/common/stream_encoder_mt.c | 4 ++++ + 1 file changed, 4 insertions(+) + +commit b0f8d9293ca5a0a56964695a59a098c9d1e82b99 +Author: Jia Tan +Date: 2022-05-05 20:53:42 +0800 + + liblzma: Add support for LZMA_SYNC_FLUSH in the Block encoder. + + The documentation mentions that lzma_block_encoder() supports + LZMA_SYNC_FLUSH but it was never added to supported_actions[] + in the internal structure. Because of this, LZMA_SYNC_FLUSH could + not be used with the Block encoder unless it was the next coder + after something like stream_encoder() or stream_encoder_mt(). + + src/liblzma/common/block_encoder.c | 1 + + 1 file changed, 1 insertion(+) + +commit 6997e0b5e2339025646cfaec13a3317fb340729b +Author: Lasse Collin +Date: 2022-11-23 21:55:22 +0200 + + liblzma: Add lzma_attr_warn_unused_result to lzma_filters_copy(). + + src/liblzma/api/lzma/filter.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +commit f94a3e34603c56c55777056bb5412bfd0e948f0b +Author: Lasse Collin +Date: 2022-11-23 21:26:21 +0200 + + liblzma: Fix invalid free() after memory allocation failure. + + The bug was in the single-threaded .xz Stream encoder + in the code that is used for both re-initialization and for + lzma_filters_update(). To trigger it, an application had + to either re-initialize an existing encoder instance with + lzma_stream_encoder() or use lzma_filters_update(), and + then one of the 1-4 tiny allocations in lzma_filters_copy() + (called from stream_encoder_update()) must fail. An error + was correctly reported but the encoder state was corrupted. + + This is related to the recent fix in + f8ee61e74eb40600445fdb601c374d582e1e9c8a which is good but + it wasn't enough to fix the main problem in stream_encoder.c. + + src/liblzma/common/stream_encoder.c | 39 +++++++++++++++++++++++++++++-------- + 1 file changed, 31 insertions(+), 8 deletions(-) + +commit 8309385b444bce23e56256e21fb008a170434008 +Author: Lasse Collin +Date: 2022-11-22 16:37:15 +0200 + + liblzma: Fix language in a comment. + + src/liblzma/common/stream_encoder.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 5fecba6022cbbeed8103b49d79b2fb36086be72d +Author: Lasse Collin +Date: 2022-11-22 11:20:17 +0200 + + liblzma: Fix infinite loop in LZMA encoder init with dict_size >= 2 GiB. + + The encoder doesn't support dictionary sizes larger than 1536 MiB. + This is validated, for example, when calculating the memory usage + via lzma_raw_encoder_memusage(). It is also enforced by the LZ + part of the encoder initialization. However, LZMA encoder with + LZMA_MODE_NORMAL did an unsafe calculation with dict_size before + such validation and that results in an infinite loop if dict_size + was 2 << 30 or greater. + + src/liblzma/lzma/lzma_encoder.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +commit 1946b2b141d8b12c9325ba48d3a04ad8e8206750 +Author: Lasse Collin +Date: 2022-11-21 13:02:33 +0200 + + liblzma: Fix two Doxygen commands in the API headers. + + These were caught by clang -Wdocumentation. + + src/liblzma/api/lzma/hardware.h | 2 +- + src/liblzma/api/lzma/index_hash.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + commit 5476089d9c42b9b04e92b80e1800b384a98265cb Author: Lasse Collin Date: 2022-11-13 19:58:47 +0200 diff --git a/README b/README index 110d242966ef..be01d60ae202 100644 --- a/README +++ b/README @@ -226,11 +226,10 @@ XZ Utils ---------------------- If you have questions, bug reports, patches etc. related to XZ Utils, - contact Lasse Collin (in Finnish or English). - I'm sometimes slow at replying. If you haven't got a reply within two - weeks, assume that your email has got lost and resend it or use IRC. + the project maintainers Lasse Collin and Jia Tan can be reached via + . - You can find me also from #tukaani on Freenode; my nick is Larhzu. - The channel tends to be pretty quiet, so just ask your question and - someone may wake up. + You might find Lasse also from #tukaani on Libera Chat (IRC). + The nick is Larhzu. The channel tends to be pretty quiet, + so just ask your question and someone might wake up. diff --git a/src/liblzma/api/lzma/filter.h b/src/liblzma/api/lzma/filter.h index 04825c655eab..5ec9976d4a68 100644 --- a/src/liblzma/api/lzma/filter.h +++ b/src/liblzma/api/lzma/filter.h @@ -120,7 +120,8 @@ extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id) */ extern LZMA_API(lzma_ret) lzma_filters_copy( const lzma_filter *src, lzma_filter *dest, - const lzma_allocator *allocator) lzma_nothrow; + const lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; /** diff --git a/src/liblzma/api/lzma/hardware.h b/src/liblzma/api/lzma/hardware.h index 47481f2581f7..7040aae45fb1 100644 --- a/src/liblzma/api/lzma/hardware.h +++ b/src/liblzma/api/lzma/hardware.h @@ -57,7 +57,7 @@ extern LZMA_API(uint64_t) lzma_physmem(void) lzma_nothrow; * If the hardware supports more than one thread per CPU core, the number * of hardware threads is returned if that information is available. * - * \brief On success, the number of available CPU threads or cores is + * \return On success, the number of available CPU threads or cores is * returned. If this information isn't available or an error * occurs, zero is returned. */ diff --git a/src/liblzma/api/lzma/index_hash.h b/src/liblzma/api/lzma/index_hash.h index 9287f1dfdb56..8ff4163365f8 100644 --- a/src/liblzma/api/lzma/index_hash.h +++ b/src/liblzma/api/lzma/index_hash.h @@ -52,7 +52,7 @@ extern LZMA_API(void) lzma_index_hash_end( /** * \brief Add a new Record to an Index hash * - * \param index Pointer to a lzma_index_hash structure + * \param index_hash Pointer to a lzma_index_hash structure * \param unpadded_size Unpadded Size of a Block * \param uncompressed_size Uncompressed Size of a Block * diff --git a/src/liblzma/api/lzma/version.h b/src/liblzma/api/lzma/version.h index fcae6ea2abc2..087bf134557c 100644 --- a/src/liblzma/api/lzma/version.h +++ b/src/liblzma/api/lzma/version.h @@ -22,7 +22,7 @@ */ #define LZMA_VERSION_MAJOR 5 #define LZMA_VERSION_MINOR 2 -#define LZMA_VERSION_PATCH 8 +#define LZMA_VERSION_PATCH 9 #define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_STABLE #ifndef LZMA_VERSION_COMMIT diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c index 168846ad6899..520ecc5a49f4 100644 --- a/src/liblzma/common/block_encoder.c +++ b/src/liblzma/common/block_encoder.c @@ -217,6 +217,7 @@ lzma_block_encoder(lzma_stream *strm, lzma_block *block) lzma_next_strm_init(lzma_block_encoder_init, strm, block); strm->internal->supported_actions[LZMA_RUN] = true; + strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true; strm->internal->supported_actions[LZMA_FINISH] = true; return LZMA_OK; diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h index 33928c3d2b33..7fb1732adff7 100644 --- a/src/liblzma/common/common.h +++ b/src/liblzma/common/common.h @@ -34,6 +34,18 @@ #include "lzma.h" +// The extra symbol versioning in the C files may only be used when +// building a shared library. If HAVE_SYMBOL_VERSIONS_LINUX is defined +// to 2 then symbol versioning is done only if also PIC is defined. +// By default Libtool defines PIC when building a shared library and +// doesn't define it when building a static library but it can be +// overriden with --with-pic and --without-pic. configure let's rely +// on PIC if neither --with-pic or --without-pic was used. +#if defined(HAVE_SYMBOL_VERSIONS_LINUX) \ + && (HAVE_SYMBOL_VERSIONS_LINUX == 2 && !defined(PIC)) +# undef HAVE_SYMBOL_VERSIONS_LINUX +#endif + #ifdef HAVE_SYMBOL_VERSIONS_LINUX // To keep link-time optimization (LTO, -flto) working with GCC, // the __symver__ attribute must be used instead of __asm__(".symver ..."). diff --git a/src/liblzma/common/filter_encoder.c b/src/liblzma/common/filter_encoder.c index c5d8f39721fe..f0762d9a0afa 100644 --- a/src/liblzma/common/filter_encoder.c +++ b/src/liblzma/common/filter_encoder.c @@ -59,7 +59,7 @@ static const lzma_filter_encoder encoders[] = { .id = LZMA_FILTER_LZMA1, .init = &lzma_lzma_encoder_init, .memusage = &lzma_lzma_encoder_memusage, - .block_size = NULL, // FIXME + .block_size = NULL, // Not needed for LZMA1 .props_size_get = NULL, .props_size_fixed = 5, .props_encode = &lzma_lzma_props_encode, @@ -70,7 +70,7 @@ static const lzma_filter_encoder encoders[] = { .id = LZMA_FILTER_LZMA2, .init = &lzma_lzma2_encoder_init, .memusage = &lzma_lzma2_encoder_memusage, - .block_size = &lzma_lzma2_block_size, // FIXME + .block_size = &lzma_lzma2_block_size, .props_size_get = NULL, .props_size_fixed = 1, .props_encode = &lzma_lzma2_props_encode, diff --git a/src/liblzma/common/stream_encoder.c b/src/liblzma/common/stream_encoder.c index 858cba473ad4..b15229c39ec4 100644 --- a/src/liblzma/common/stream_encoder.c +++ b/src/liblzma/common/stream_encoder.c @@ -233,6 +233,13 @@ stream_encoder_update(void *coder_ptr, const lzma_allocator *allocator, const lzma_filter *reversed_filters) { lzma_stream_coder *coder = coder_ptr; + lzma_ret ret; + + // Make a copy to a temporary buffer first. This way it is easier + // to keep the encoder state unchanged if an error occurs with + // lzma_filters_copy(). + lzma_filter temp[LZMA_FILTERS_MAX + 1]; + return_if_error(lzma_filters_copy(filters, temp, allocator)); if (coder->sequence <= SEQ_BLOCK_INIT) { // There is no incomplete Block waiting to be finished, @@ -240,31 +247,47 @@ stream_encoder_update(void *coder_ptr, const lzma_allocator *allocator, // trying to initialize the Block encoder with the new // chain. This way we detect if the chain is valid. coder->block_encoder_is_initialized = false; - coder->block_options.filters = (lzma_filter *)(filters); - const lzma_ret ret = block_encoder_init(coder, allocator); + coder->block_options.filters = temp; + ret = block_encoder_init(coder, allocator); coder->block_options.filters = coder->filters; if (ret != LZMA_OK) - return ret; + goto error; coder->block_encoder_is_initialized = true; } else if (coder->sequence <= SEQ_BLOCK_ENCODE) { // We are in the middle of a Block. Try to update only // the filter-specific options. - return_if_error(coder->block_encoder.update( + ret = coder->block_encoder.update( coder->block_encoder.coder, allocator, - filters, reversed_filters)); + filters, reversed_filters); + if (ret != LZMA_OK) + goto error; } else { // Trying to update the filter chain when we are already // encoding Index or Stream Footer. - return LZMA_PROG_ERROR; + ret = LZMA_PROG_ERROR; + goto error; } - // Free the copy of the old chain and make a copy of the new chain. + // Free the options of the old chain. for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i) lzma_free(coder->filters[i].options, allocator); - return lzma_filters_copy(filters, coder->filters, allocator); + // Copy the new filter chain in place. + size_t j = 0; + do { + coder->filters[j].id = temp[j].id; + coder->filters[j].options = temp[j].options; + } while (temp[j++].id != LZMA_VLI_UNKNOWN); + + return LZMA_OK; + +error: + for (size_t i = 0; temp[i].id != LZMA_VLI_UNKNOWN; ++i) + lzma_free(temp[i].options, allocator); + + return ret; } @@ -319,7 +342,7 @@ stream_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, // Initialize the Block encoder. This way we detect unsupported // filter chains when initializing the Stream encoder instead of - // giving an error after Stream Header has already written out. + // giving an error after Stream Header has already been written out. return stream_encoder_update(coder, allocator, filters, NULL); } diff --git a/src/liblzma/common/stream_encoder_mt.c b/src/liblzma/common/stream_encoder_mt.c index 819b22733b68..6dc10c3a095d 100644 --- a/src/liblzma/common/stream_encoder_mt.c +++ b/src/liblzma/common/stream_encoder_mt.c @@ -1053,6 +1053,10 @@ stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator, for (size_t i = 0; coder->filters[i].id != LZMA_VLI_UNKNOWN; ++i) lzma_free(coder->filters[i].options, allocator); + // Mark it as empty so that it is in a safe state in case + // lzma_filters_copy() fails. + coder->filters[0].id = LZMA_VLI_UNKNOWN; + return_if_error(lzma_filters_copy( filters, coder->filters, allocator)); diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c index c1552f194451..bbbb5d2d7cdf 100644 --- a/src/liblzma/lzma/lzma_encoder.c +++ b/src/liblzma/lzma/lzma_encoder.c @@ -559,10 +559,9 @@ lzma_lzma_encoder_create(void **coder_ptr, lzma_lzma1_encoder *coder = *coder_ptr; - // Set compression mode. We haven't validates the options yet, - // but it's OK here, since nothing bad happens with invalid - // options in the code below, and they will get rejected by - // lzma_lzma_encoder_reset() call at the end of this function. + // Set compression mode. Note that we haven't validated the options + // yet. Invalid options will get rejected by lzma_lzma_encoder_reset() + // call at the end of this function. switch (options->mode) { case LZMA_MODE_FAST: coder->fast_mode = true; @@ -573,6 +572,18 @@ lzma_lzma_encoder_create(void **coder_ptr, // Set dist_table_size. // Round the dictionary size up to next 2^n. + // + // Currently the maximum encoder dictionary size + // is 1.5 GiB due to lz_encoder.c and here we need + // to be below 2 GiB to make the rounded up value + // fit in an uint32_t and avoid an infite while-loop + // (and undefined behavior due to a too large shift). + // So do the same check as in LZ encoder, + // limiting to 1.5 GiB. + if (options->dict_size > (UINT32_C(1) << 30) + + (UINT32_C(1) << 29)) + return LZMA_OPTIONS_ERROR; + uint32_t log_size = 0; while ((UINT32_C(1) << log_size) < options->dict_size) ++log_size;