mirror of
https://git.FreeBSD.org/ports.git
synced 2025-01-19 08:13:21 +00:00
graphics/lepton: upgrade and fix package-building
The upstream addressed our earlier BSD-specific concerns, so some of our patches are no longer needed. We now explicitly add -mssse3 to the CFLAGS. The SSSE3-capability has always been a requirement, but without it set (either explicitly or via -march), parts of the code would not compile: PR: 275197
This commit is contained in:
parent
2aaba46921
commit
00d132c04a
@ -1,5 +1,5 @@
|
||||
PORTNAME= lepton
|
||||
PORTVERSION= 0.2016.07.16
|
||||
PORTVERSION= 0.2022.04.13
|
||||
CATEGORIES= graphics devel
|
||||
|
||||
MAINTAINER= mi@aldan.algebra.com
|
||||
@ -8,14 +8,20 @@ WWW= https://github.com/dropbox/lepton
|
||||
|
||||
LICENSE= APACHE20
|
||||
|
||||
LIB_DEPENDS= libbrotlienc.so:archivers/brotli
|
||||
|
||||
USES= autoreconf localbase ssl shebangfix python:test
|
||||
USE_GITHUB= yes
|
||||
GH_ACCOUNT= dropbox
|
||||
GH_TAGNAME= a34ee2f4b0a6454eff8ebe334750dd008d57de35
|
||||
GH_TAGNAME= 429fe88
|
||||
|
||||
USES= autoreconf
|
||||
GNU_CONFIGURE= yes
|
||||
CONFIGURE_ARGS+=--enable-system-dependencies --disable-native-opt
|
||||
TEST_TARGET= check
|
||||
CFLAGS+= -DBSD -DGIT_REVISION='\"${GH_TAGNAME}\"'
|
||||
SHEBANG_FILES= test_suite/*.py
|
||||
CFLAGS+= -DBSD -DGIT_REVISION='\"${GH_TAGNAME}\"' \
|
||||
-DUSE_SYSTEM_DEPENDENCIES -DUSE_SYSTEM_MD5_DEPENDENCY \
|
||||
-Wno-deprecated-declarations
|
||||
.ifndef WITH_DEBUG
|
||||
CFLAGS+= -DNDEBUG
|
||||
.endif
|
||||
@ -24,9 +30,18 @@ PLIST_FILES= bin/lepton
|
||||
|
||||
.if !${MACHINE_CPU:Mssse3}
|
||||
IGNORE= requires SSSE3 features in the processor
|
||||
.else
|
||||
CFLAGS+= -mssse3
|
||||
.endif
|
||||
|
||||
do-install:
|
||||
${INSTALL_PROGRAM} ${WRKSRC}/lepton ${STAGEDIR}${PREFIX}/bin
|
||||
|
||||
.include <bsd.port.mk>
|
||||
.include <bsd.port.pre.mk>
|
||||
|
||||
.if ${OPENSSLBASE} != /usr
|
||||
LDFLAGS+=-L${OPENSSLLIB} ${OPENSSL_LDFLAGS}
|
||||
CFLAGS+=-isystem ${OPENSSLINC}
|
||||
.endif
|
||||
|
||||
.include <bsd.port.post.mk>
|
||||
|
@ -1,3 +1,3 @@
|
||||
TIMESTAMP = 1468524984
|
||||
SHA256 (dropbox-lepton-0.2016.07.16-a34ee2f4b0a6454eff8ebe334750dd008d57de35_GH0.tar.gz) = 9e1941c7cc72b50a20f4f7a9495df42f7978d4cd1b9764c74787af997bd386f9
|
||||
SIZE (dropbox-lepton-0.2016.07.16-a34ee2f4b0a6454eff8ebe334750dd008d57de35_GH0.tar.gz) = 50216342
|
||||
TIMESTAMP = 1702853189
|
||||
SHA256 (dropbox-lepton-0.2022.04.13-429fe88_GH0.tar.gz) = c6e7bba595bc5f9ece5eca86ad33b2fe1487e1a0209788173fd74e0a79d79315
|
||||
SIZE (dropbox-lepton-0.2022.04.13-429fe88_GH0.tar.gz) = 52639694
|
||||
|
@ -1,28 +1,8 @@
|
||||
Do not use the zlib and md5 implementations bundled by
|
||||
the author(s). Use base-system's...
|
||||
Do not use the brotli, md5, and zlib implementations bundled by
|
||||
the author(s). Use port's or the base-system's...
|
||||
|
||||
-mi
|
||||
|
||||
+++ src/io/ZlibCompression.hh
|
||||
@@ -28,7 +28,7 @@
|
||||
*/
|
||||
|
||||
#include "Reader.hh"
|
||||
-#include "../../dependencies/zlib/zlib.h"
|
||||
+#include <zlib.h>
|
||||
namespace Sirikata{
|
||||
class SIRIKATA_EXPORT ZlibDecoderDecompressionReader : public DecoderReader {
|
||||
protected:
|
||||
+++ src/lepton/bitops.cc
|
||||
@@ -38,7 +38,7 @@ reading and writing of arrays
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
extern "C" {
|
||||
-#include "../../dependencies/md5/md5.h"
|
||||
+#include <openssl/md5.h>
|
||||
}
|
||||
#include "bitops.hh"
|
||||
|
||||
+++ src/io/ioutil.cc
|
||||
@@ -12,5 +12,5 @@
|
||||
#include "Reader.hh"
|
||||
@ -31,69 +11,41 @@ the author(s). Use base-system's...
|
||||
+#include <openssl/md5.h>
|
||||
#ifdef _WIN32
|
||||
#include <Windows.h>
|
||||
+++ Makefile.am
|
||||
@@ -1,13 +1,13 @@
|
||||
-includes = -I$(srcdir)/dependencies/xz/src/liblzma/api -I$(srcdir)/src/vp8/util -I$(srcdir)/src/vp8/model -I$(srcdir)/src/vp8/decoder -I$(srcdir)/src/vp8/encoder -I$(srcdir)/dependencies -I$(srcdir)/dependencies -I$(srcdir)/dependencies/xz/src/liblzma/common -I$(srcdir)/dependencies/xz/src/common -I$(srcdir)/dependencies/xz/src/liblzma/lzma -I$(srcdir)/dependencies/xz/src/liblzma/lz -I$(srcdir)/dependencies/xz/src/liblzma/check -I$(srcdir)/dependencies/xz/src/liblzma/rangecoder -I$(srcdir)/dependencies/xz/src/liblzma/api -I$(srcdir)/dependencies/xz/src/liblzma/simple -I$(srcdir)/dependencies/xz/src/liblzma/delta -I$(srcdir)/dependencies/xz/src/liblzma
|
||||
--- Makefile.am 2022-04-13 03:01:03.000000000 -0400
|
||||
+++ Makefile.am 2023-12-17 17:58:27.679341000 -0500
|
||||
@@ -1,3 +1,3 @@
|
||||
-includes = -I$(srcdir)/dependencies/xz/src/liblzma/api -I$(srcdir)/src/vp8/util -I$(srcdir)/src/vp8/model -I$(srcdir)/src/vp8/decoder -I$(srcdir)/src/vp8/encoder -I$(srcdir)/dependencies -I$(srcdir)/dependencies -I$(srcdir)/dependencies/brotli/c/include -I$(srcdir)/dependencies/xz/src/liblzma/common -I$(srcdir)/dependencies/xz/src/common -I$(srcdir)/dependencies/xz/src/liblzma/lzma -I$(srcdir)/dependencies/xz/src/liblzma/lz -I$(srcdir)/dependencies/xz/src/liblzma/check -I$(srcdir)/dependencies/xz/src/liblzma/rangecoder -I$(srcdir)/dependencies/xz/src/liblzma/api -I$(srcdir)/dependencies/xz/src/liblzma/simple -I$(srcdir)/dependencies/xz/src/liblzma/delta -I$(srcdir)/dependencies/xz/src/liblzma
|
||||
+includes = -I$(srcdir)/src/vp8/util -I$(srcdir)/src/vp8/model -I$(srcdir)/src/vp8/decoder -I$(srcdir)/src/vp8/encoder
|
||||
|
||||
-AM_CXXFLAGS = $(CXX11_FLAGS) $(PICKY_CXXFLAGS) $(ARCH_FLAGS) $(SANITIZE_FLAGS) $(NODEBUG_CXXFLAGS) -DGIT_REVISION=\"$(shell git describe --dirty --always)\" $(includes)
|
||||
|
||||
@@ -5,14 +5,14 @@
|
||||
liblocalmd5_a_SOURCES = dependencies/md5/md5.c
|
||||
|
||||
-AM_CXXFLAGS = $(CXX11_FLAGS) $(CODEC_FLAGS) $(SYSTEM_DEPENDENCIES_CFLAGS) $(ARITHMETIC_CODER_CFLAGS) $(MEMORY_MANAGEMENT_CFLAGS) $(THREAD_FLAGS) $(BILLING_FLAGS) $(PICKY_CXXFLAGS) $(BENCHMARK_CFLAGS) $(ARCH_FLAGS) $(SANITIZE_FLAGS) $(NODEBUG_CXXFLAGS) -DGIT_REVISION=\"$(shell git describe --dirty --always 2> /dev/null || basename `pwd`)\" $(includes)
|
||||
+AM_CXXFLAGS = $(CXX11_FLAGS) $(PICKY_CXXFLAGS) $(ARCH_FLAGS) $(SANITIZE_FLAGS) $(NODEBUG_CXXFLAGS) $(includes)
|
||||
|
||||
AM_CFLAGS = $(C99_FLAGS) $(PICKY_CFLAGS) $(ARCH_FLAGS) $(SANITIZE_FLAGS) $(NODEBUG_CXXFLAGS)
|
||||
AM_CFLAGS = $(C99_FLAGS) $(CODEC_FLAGS) $(THREAD_FLAGS) $(BILLING_FLAGS) $(BENCHMARK_CFLAGS) $(PICKY_CFLAGS) $(ARCH_FLAGS) $(SANITIZE_FLAGS) $(NODEBUG_CXXFLAGS) -I$(srcdir)/dependencies/brotli/c/include
|
||||
|
||||
-noinst_LIBRARIES = liblocalzlib.a libtestdriver.a liblocalmd5.a
|
||||
-noinst_LIBRARIES = liblocalzlib.a liblocalmd5.a libtestdriver.a liblocalbrotli.a
|
||||
+noinst_LIBRARIES = libtestdriver.a
|
||||
|
||||
bin_PROGRAMS = lepton test_suite/test_invariants
|
||||
bin_PROGRAMS = lepton
|
||||
noinst_PROGRAMS = test_suite/test_invariants
|
||||
|
||||
-lepton_LDADD = liblocalmd5.a liblocalzlib.a -lpthread
|
||||
+lepton_LDADD = -lz -lcrypto -lpthread
|
||||
-lepton_LDADD = liblocalmd5.a liblocalbrotli.a $(SYSTEM_DEPENDENCIES_LDFLAGS) -lpthread
|
||||
+lepton_LDADD = -lz -lcrypto -lpthread -L${LOCALBASE}/lib -lbrotlidec -lbrotlienc
|
||||
|
||||
lepton_SOURCES = \
|
||||
@@ -80,5 +80,4 @@
|
||||
@@ -92,5 +92,4 @@
|
||||
src/vp8/model/model.cc \
|
||||
src/vp8/model/model.hh \
|
||||
- src/vp8/model/numeric.cc \
|
||||
src/vp8/model/numeric.hh \
|
||||
src/vp8/model/jpeg_meta.hh \
|
||||
@@ -94,33 +93,4 @@
|
||||
src/vp8/decoder/vpx_bool_reader.hh
|
||||
|
||||
-liblocalmd5_a_SOURCES = dependencies/md5/md5.c
|
||||
-
|
||||
-liblocalzlib_a_SOURCES = dependencies/zlib/inflate.c \
|
||||
-dependencies/zlib/inflate.h \
|
||||
-dependencies/zlib/gzguts.h \
|
||||
-dependencies/zlib/infback.c \
|
||||
-dependencies/zlib/trees.c \
|
||||
-dependencies/zlib/adler32.c \
|
||||
-dependencies/zlib/gzclose.c \
|
||||
-dependencies/zlib/inftrees.h \
|
||||
-dependencies/zlib/zconf.h \
|
||||
-dependencies/zlib/compress.c \
|
||||
-dependencies/zlib/crc32.c \
|
||||
-dependencies/zlib/crc32.h \
|
||||
-dependencies/zlib/trees.h \
|
||||
-dependencies/zlib/inftrees.c \
|
||||
-dependencies/zlib/zutil.c \
|
||||
-dependencies/zlib/gzwrite.c \
|
||||
-dependencies/zlib/zutil.h \
|
||||
-dependencies/zlib/zlib.h \
|
||||
-dependencies/zlib/inffixed.h \
|
||||
-dependencies/zlib/deflate.c \
|
||||
-dependencies/zlib/inffast.h \
|
||||
-dependencies/zlib/inffast.c \
|
||||
-dependencies/zlib/uncompr.c \
|
||||
-dependencies/zlib/gzread.c \
|
||||
-dependencies/zlib/deflate.h \
|
||||
-dependencies/zlib/gzlib.c
|
||||
-
|
||||
libtestdriver_a_SOURCES = test_suite/timing_driver.cc
|
||||
|
||||
@@ -142,5 +112,5 @@
|
||||
@@ -232,5 +231,5 @@
|
||||
src/io/DecoderPlatform.hh
|
||||
|
||||
-test_suite_test_invariants_LDADD = liblocalzlib.a liblocalmd5.a
|
||||
+test_suite_test_invariants_LDADD = -lz -lcrypto
|
||||
-test_suite_test_invariants_LDADD = $(SYSTEM_DEPENDENCIES_LDFLAGS) liblocalmd5.a
|
||||
+test_suite_test_invariants_LDADD = $(SYSTEM_DEPENDENCIES_LDFLAGS) -lcrypto
|
||||
|
||||
check_PROGRAMS = test_suite/test_recode_memory_bound test_suite/test_truncate_lowmem test_suite/test_android_lowmem test_suite/test_invariants test_suite/test_baseline_ujg test_suite/test_baseline test_suite/test_misc test_suite/test_iphone test_suite/test_phone_outdoor test_suite/test_truncate_ujg test_suite/test_truncate test_suite/test_SLR test_suite/test_progressive_ujg test_suite/test_progressive_disallowed test_suite/test_progressive test_suite/test_arithmetic_failfast test_suite/test_hq test_suite/test_baseline_unjailed test_suite/test_baseline_unjailed_thread test_suite/test_baseline_unjailed_decode test_suite/test_baseline_unjailed_decode_thread test_suite/test_seccomp_encode_main test_suite/test_seccomp_encode_thread test_suite/test_seccomp_decode_main test_suite/test_seccomp_decode_thread test_suite/test_nofsync test_suite/test_colorswap test_suite/test_odd_rst test_suite/test_trailing_header test_suite/test_trailing_rst test_suite/test_gray2sf test_suite/test_truncated_zero_run test_suite/test_bad_zero_run
|
||||
check_PROGRAMS = test_suite/test_recode_memory_bound test_suite/test_truncate_lowmem test_suite/test_android_lowmem test_suite/test_invariants test_suite/test_baseline_ujg test_suite/test_baseline test_suite/test_misc test_suite/test_iphone test_suite/test_phone_outdoor test_suite/test_truncate test_suite/test_single_row_truncate test_suite/test_SLR test_suite/test_progressive_ujg test_suite/test_progressive_disallowed test_suite/test_progressive test_suite/test_arithmetic_failfast test_suite/test_hq test_suite/test_baseline_unjailed test_suite/test_baseline_unjailed_thread test_suite/test_baseline_unjailed_decode test_suite/test_baseline_unjailed_decode_thread test_suite/test_seccomp_encode_main test_suite/test_seccomp_encode_thread test_suite/test_seccomp_decode_main test_suite/test_seccomp_decode_thread test_suite/test_nofsync test_suite/test_colorswap test_suite/test_odd_rst test_suite/test_trailing_header test_suite/test_trailing_rst test_suite/test_gray2sf test_suite/test_truncated_zero_run test_suite/test_bad_zero_run
|
||||
|
@ -1,148 +0,0 @@
|
||||
Make buildable on BSD-systems other than Apple. Mostly this
|
||||
simply replaces #ifdef __APPLE__ with #ifdef BSD, but not
|
||||
only...
|
||||
|
||||
-mi
|
||||
|
||||
+++ src/io/DecoderPlatform.hh
|
||||
@@ -26,7 +26,7 @@
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
-#if defined (__linux) || defined (__APPLE__)
|
||||
+#if defined (__linux) || defined (BSD)
|
||||
#define SIRIKATA_FUNCTION_EXPORT __attribute__ ((visibility("default")))
|
||||
#define SIRIKATA_EXPORT __attribute__ ((visibility("default")))
|
||||
#define SIRIKATA_PLUGIN_EXPORT __attribute__ ((visibility("default")))
|
||||
+++ src/io/MemMgrAllocator.cc
|
||||
@@ -41,7 +41,7 @@
|
||||
#include <cstdint>
|
||||
#include "DecoderPlatform.hh"
|
||||
#include "MemMgrAllocator.hh"
|
||||
-#if (defined(__APPLE__) || __cplusplus <= 199711L) && !defined(_WIN32)
|
||||
+#if (defined(BSD) || __cplusplus <= 199711L) && !defined(_WIN32)
|
||||
#define THREAD_LOCAL_STORAGE __thread
|
||||
#else
|
||||
#include <atomic>
|
||||
@@ -151,7 +151,7 @@ void setup_memmgr(MemMgrState& memmgr, u
|
||||
}
|
||||
void memmgr_init(size_t main_thread_pool_size, size_t worker_thread_pool_size, size_t num_workers, size_t x_min_pool_alloc_quantas, bool needs_huge_pages)
|
||||
{
|
||||
-#ifdef __APPLE__
|
||||
+#ifdef BSD
|
||||
// in apple, the thread_local storage winds up different when destroying the thread
|
||||
num_workers *= 2;
|
||||
#endif
|
||||
+++ src/lepton/fork_serve.cc
|
||||
@@ -10,7 +10,7 @@
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <algorithm>
|
||||
-#ifndef __APPLE__
|
||||
+#ifndef BSD
|
||||
#include <wait.h>
|
||||
#else
|
||||
#include <sys/wait.h>
|
||||
+++ src/lepton/socket_serve.cc
|
||||
@@ -11,7 +11,7 @@
|
||||
#include <algorithm>
|
||||
#include <netinet/in.h>
|
||||
#include <sys/time.h>
|
||||
-#ifndef __APPLE__
|
||||
+#ifndef BSD
|
||||
#include <sys/signalfd.h>
|
||||
#include <wait.h>
|
||||
#else
|
||||
@@ -127,7 +127,7 @@ int should_wait_bitmask(size_t children_
|
||||
|
||||
int make_sigchld_fd() {
|
||||
int fd = -1;
|
||||
-#ifndef __APPLE__
|
||||
+#ifndef BSD
|
||||
sigset_t sigset;
|
||||
int err = sigemptyset(&sigset);
|
||||
always_assert(err == 0);
|
||||
@@ -233,7 +233,7 @@ void serving_loop(int unix_domain_socket
|
||||
if (fds[i].revents & POLLIN) {
|
||||
fds[i].revents = 0;
|
||||
if (fds[i].fd == sigchild_fd) {
|
||||
-#ifndef __APPLE__
|
||||
+#ifndef BSD
|
||||
struct signalfd_siginfo info;
|
||||
ssize_t ignore = read(fds[i].fd, &info, sizeof(info));
|
||||
(void)ignore;
|
||||
+++ src/vp8/model/jpeg_meta.hh
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include <vector>
|
||||
#include <type_traits>
|
||||
-#ifndef __APPLE__
|
||||
+#ifndef BSD
|
||||
#ifndef _WIN32
|
||||
#include <endian.h>
|
||||
#endif
|
||||
+++ src/vp8/util/memory.cc
|
||||
@@ -13,7 +13,7 @@
|
||||
#ifdef _WIN32
|
||||
#define USE_STANDARD_MEMORY_ALLOCATORS
|
||||
#endif
|
||||
-#if defined(__APPLE__) || (__cplusplus <= 199711L && !defined(_WIN32))
|
||||
+#if defined(BSD) || (__cplusplus <= 199711L && !defined(_WIN32))
|
||||
#define THREAD_LOCAL_STORAGE __thread
|
||||
#else
|
||||
#define THREAD_LOCAL_STORAGE thread_local
|
||||
+++ src/vp8/util/vpx_config.hh
|
||||
@@ -44,6 +44,6 @@
|
||||
#ifndef _BSD_SOURCE
|
||||
#define _BSD_SOURCE /* See feature_test_macros(7) */
|
||||
#endif
|
||||
-#include <endian.h>
|
||||
+#include <sys/endian.h>
|
||||
#endif
|
||||
#endif
|
||||
+++ test_suite/timing_driver.cc
|
||||
@@ -6,11 +6,13 @@
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
+#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/select.h>
|
||||
+#include <sys/signal.h>
|
||||
#include <sys/time.h>
|
||||
#include "smalljpg.hh"
|
||||
|
||||
+++ src/lepton/jpgcoder.cc
|
||||
@@ -77,11 +77,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBI
|
||||
int g_argc = 0;
|
||||
const char** g_argv = NULL;
|
||||
#ifndef GIT_REVISION
|
||||
-#include "version.hh"
|
||||
-#ifndef GIT_REVISION
|
||||
#define GIT_REVISION "unknown"
|
||||
#endif
|
||||
-#endif
|
||||
bool fast_exit = true;
|
||||
#ifdef SKIP_VALIDATION
|
||||
bool g_skip_validation = true;
|
||||
+++ src/io/ioutil.cc
|
||||
@@ -325,13 +325,13 @@
|
||||
int input_tee_flags = 0;
|
||||
int copy_to_storage_flags = 0;
|
||||
-#ifndef __APPLE__
|
||||
+#ifndef BSD
|
||||
input_tee_flags = fcntl(input_tee, F_GETFL, 0);
|
||||
#endif
|
||||
fcntl(input_tee, F_SETFL, input_tee_flags | O_NONBLOCK);
|
||||
-#ifndef __APPLE__
|
||||
+#ifndef BSD
|
||||
copy_to_input_tee_flags = fcntl(copy_to_input_tee, F_GETFL, 0);
|
||||
#endif
|
||||
fcntl(copy_to_input_tee, F_SETFL, copy_to_input_tee_flags | O_NONBLOCK);
|
||||
-#ifndef __APPLE__
|
||||
+#ifndef BSD
|
||||
copy_to_storage_flags = fcntl(copy_to_storage, F_GETFL, 0);
|
||||
#endif
|
@ -1,217 +0,0 @@
|
||||
Make -- or attempt to -- the code work on CPUs with only SSSE3
|
||||
instruction set...
|
||||
|
||||
-mi
|
||||
|
||||
--- src/lepton/idct.cc
|
||||
+++ src/lepton/idct.cc
|
||||
@@ -1,8 +1,7 @@
|
||||
/* -*-mode:c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
||||
-#include <emmintrin.h>
|
||||
-#include <smmintrin.h>
|
||||
#include <immintrin.h>
|
||||
#include "../vp8/util/aligned_block.hh"
|
||||
+#include "../vp8/util/mm_mullo_epi32.hh"
|
||||
|
||||
namespace idct_local{
|
||||
enum {
|
||||
@@ -23,7 +21,10 @@ enum {
|
||||
r2 = 181 // 256/sqrt(2)
|
||||
};
|
||||
}
|
||||
-void idct_scalar(const AlignedBlock &block, const uint16_t q[64], int16_t outp[64], bool ignore_dc) {
|
||||
+
|
||||
+#ifndef __SSE2__
|
||||
+static void
|
||||
+idct_scalar(const AlignedBlock &block, const uint16_t q[64], int16_t outp[64], bool ignore_dc) {
|
||||
int32_t intermed[64];
|
||||
using namespace idct_local;
|
||||
// Horizontal 1-D IDCT.
|
||||
@@ -149,6 +150,8 @@ void idct_scalar(const AlignedBlock &blo
|
||||
//outp[i]>>=3;
|
||||
}
|
||||
}
|
||||
+#else /* At least SSE2 is available { */
|
||||
+
|
||||
template<int which_vec, int offset, int stride> __m128i vget_raster(const AlignedBlock&block) {
|
||||
return _mm_set_epi32(block.coefficients_raster(which_vec + 3 * stride + offset),
|
||||
block.coefficients_raster(which_vec + 2 * stride + offset),
|
||||
@@ -162,8 +165,8 @@ template<int offset, int stride> __m128i
|
||||
q[which_vec + offset]));
|
||||
}
|
||||
|
||||
-
|
||||
-__m128i epi32l_to_epi16(__m128i lowvec) {
|
||||
+static __m128i
|
||||
+epi32l_to_epi16(__m128i lowvec) {
|
||||
return _mm_shuffle_epi8(lowvec, _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
0xd, 0xc, 0x9, 0x8, 0x5, 0x4, 0x1, 0x0));
|
||||
}
|
||||
@@ -181,9 +184,8 @@ __m128i epi32l_to_epi16(__m128i lowvec)
|
||||
}while(0)
|
||||
|
||||
|
||||
-
|
||||
-
|
||||
-void idct_sse(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
|
||||
+static void
|
||||
+idct_sse(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
|
||||
|
||||
char vintermed_storage[64 * sizeof(int32_t) + 16];
|
||||
// align intermediate storage to 16 bytes
|
||||
@@ -202,7 +204,12 @@ void idct_sse(const AlignedBlock &block,
|
||||
xv6 = vget_raster<0, 5, 8>(block);
|
||||
xv7 = vget_raster<0, 3, 8>(block);
|
||||
if (__builtin_expect(ignore_dc, true)) {
|
||||
+#ifdef __SSE4_1__
|
||||
xv0 = _mm_insert_epi32(xv0, 0, 0);
|
||||
+#else
|
||||
+// See http://stackoverflow.com/questions/38384520/is-there-a-sse2-equivalent-for-mm-insert-epi32
|
||||
+ xv0 = _mm_and_si128(xv0, _mm_set_epi32(-1,-1,-1, 0));
|
||||
+#endif
|
||||
}
|
||||
} else {
|
||||
xv0 = vget_raster<32, 0, 8>(block);
|
||||
@@ -378,7 +385,8 @@ __m128i m256_to_epi16(__m256i vec) {
|
||||
|
||||
}*/
|
||||
#if __AVX2__
|
||||
-void idct_avx(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
|
||||
+static void
|
||||
+idct_avx(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
|
||||
// align intermediate storage to 16 bytes
|
||||
using namespace idct_local;
|
||||
// Horizontal 1-D IDCT.
|
||||
@@ -589,11 +597,16 @@ void idct_avx(const AlignedBlock &block,
|
||||
#endif
|
||||
}
|
||||
}
|
||||
-#else
|
||||
-void idct_avx(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
|
||||
- idct_sse(block, q, voutp, ignore_dc);
|
||||
-}
|
||||
#endif
|
||||
-void idct(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
|
||||
+#endif /* } SSE2 or higher is available */
|
||||
+
|
||||
+void
|
||||
+idct(const AlignedBlock &block, const uint16_t q[64], int16_t voutp[64], bool ignore_dc) {
|
||||
+#ifdef __AVX2__
|
||||
idct_avx(block, q, voutp, ignore_dc);
|
||||
+#elif __SSE2__
|
||||
+ idct_sse(block, q, voutp, ignore_dc);
|
||||
+#else
|
||||
+ idct_scalar(block, q, voutp, ignore_dc);
|
||||
+#endif
|
||||
}
|
||||
--- src/lepton/vp8_encoder.cc
|
||||
+++ src/lepton/vp8_encoder.cc
|
||||
@@ -150,29 +150,34 @@ void VP8ComponentEncoder::process_row(Pr
|
||||
}
|
||||
}
|
||||
uint32_t aligned_block_cost(const AlignedBlock &block) {
|
||||
- uint32_t cost = 16; // .25 cost for zeros
|
||||
- if (VECTORIZE) {
|
||||
- for (int i = 0; i < 64; i+= 8) {
|
||||
- __m128i val = _mm_abs_epi16(_mm_load_si128((const __m128i*)(const char*)(block.raw_data() + i)));
|
||||
- __m128i v_cost = _mm_set1_epi16(0);
|
||||
- while (!_mm_test_all_zeros(val, val)) {
|
||||
- __m128i mask = _mm_cmpgt_epi16(val, _mm_setzero_si128());
|
||||
- v_cost = _mm_add_epi16(v_cost, _mm_and_si128(mask, _mm_set1_epi16(2)));
|
||||
- val = _mm_srli_epi16(val, 1);
|
||||
- }
|
||||
- __m128i sum = _mm_add_epi16(v_cost, _mm_srli_si128(v_cost, 8));
|
||||
- sum = _mm_add_epi16(sum ,_mm_srli_si128(sum, 4));
|
||||
- sum = _mm_add_epi16(sum, _mm_srli_si128(sum, 2));
|
||||
- cost += _mm_extract_epi16(sum, 0);
|
||||
- }
|
||||
- } else {
|
||||
- uint32_t scost = 0;
|
||||
- for (int i = 0; i < 64; ++i) {
|
||||
- scost += 1 + 2 * uint16bit_length(abs(block.raw_data()[i]));
|
||||
+#ifdef __SSE2__ /* SSE2 or higher instruction set available { */
|
||||
+ const __m128i zero = _mm_setzero_si128();
|
||||
+ __m128i v_cost;
|
||||
+ for (int i = 0; i < 64; i+= 8) {
|
||||
+ __m128i val = _mm_abs_epi16(_mm_load_si128((const __m128i*)(const char*)(block.raw_data() + i)));
|
||||
+ v_cost = _mm_set1_epi16(0);
|
||||
+#ifndef __SSE4_1__
|
||||
+ while (_mm_movemask_epi8(_mm_cmpeq_epi32(val, zero)) != 0xFFFF)
|
||||
+#else
|
||||
+ while (!_mm_test_all_zeros(val, val))
|
||||
+#endif
|
||||
+ {
|
||||
+ __m128i mask = _mm_cmpgt_epi16(val, zero);
|
||||
+ v_cost = _mm_add_epi16(v_cost, _mm_and_si128(mask, _mm_set1_epi16(2)));
|
||||
+ val = _mm_srli_epi16(val, 1);
|
||||
}
|
||||
- cost = scost;
|
||||
+ v_cost = _mm_add_epi16(v_cost, _mm_srli_si128(v_cost, 8));
|
||||
+ v_cost = _mm_add_epi16(v_cost ,_mm_srli_si128(v_cost, 4));
|
||||
+ v_cost = _mm_add_epi16(v_cost, _mm_srli_si128(v_cost, 2));
|
||||
}
|
||||
- return cost;
|
||||
+ return 16 + _mm_extract_epi16(v_cost, 0);
|
||||
+#else /* } No SSE2 instructions { */
|
||||
+ uint32_t scost = 0;
|
||||
+ for (int i = 0; i < 64; ++i) {
|
||||
+ scost += 1 + 2 * uint16bit_length(abs(block.raw_data()[i]));
|
||||
+ }
|
||||
+ return scost;
|
||||
+#endif /* } */
|
||||
}
|
||||
|
||||
#ifdef ALLOW_FOUR_COLORS
|
||||
--- src/vp8/model/model.hh
|
||||
+++ src/vp8/model/model.hh
|
||||
@@ -11,9 +11,7 @@
|
||||
#include "branch.hh"
|
||||
#include "../util/aligned_block.hh"
|
||||
#include "../util/block_based_image.hh"
|
||||
-#include <smmintrin.h>
|
||||
-#include <immintrin.h>
|
||||
-#include <emmintrin.h>
|
||||
+#include "../util/mm_mullo_epi32.hh"
|
||||
|
||||
class BoolEncoder;
|
||||
constexpr bool advanced_dc_prediction = true;
|
||||
--- src/vp8/model/numeric.hh
|
||||
+++ src/vp8/model/numeric.hh
|
||||
@@ -8,8 +8,8 @@
|
||||
// for std::min
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
-#include <smmintrin.h>
|
||||
-#include <emmintrin.h>
|
||||
+#include <immintrin.h>
|
||||
+#include "../util/mm_mullo_epi32.hh"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <intrin.h>
|
||||
--- src/vp8/util/mm_mullo_epi32.hh
|
||||
+++ src/vp8/util/mm_mullo_epi32.hh
|
||||
@@ -0,0 +1,16 @@
|
||||
+#if defined(__SSE2__) && !defined(__SSE4_1__) && !defined(MM_MULLO_EPI32_H)
|
||||
+#define MM_MULLO_EPI32_H
|
||||
+#include <immintrin.h>
|
||||
+// See: http://stackoverflow.com/questions/10500766/sse-multiplication-of-4-32-bit-integers
|
||||
+// and https://software.intel.com/en-us/forums/intel-c-compiler/topic/288768
|
||||
+static inline __m128i
|
||||
+_mm_mullo_epi32(const __m128i &a, const __m128i &b)
|
||||
+{
|
||||
+ __m128i tmp1 = _mm_mul_epu32(a,b); /* mul 2,0*/
|
||||
+ __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(a,4),
|
||||
+ _mm_srli_si128(b,4)); /* mul 3,1 */
|
||||
+ return _mm_unpacklo_epi32( /* shuffle results to [63..0] and pack */
|
||||
+ _mm_shuffle_epi32(tmp1, _MM_SHUFFLE (0,0,2,0)),
|
||||
+ _mm_shuffle_epi32(tmp2, _MM_SHUFFLE (0,0,2,0)));
|
||||
+}
|
||||
+#endif
|
||||
+++ src/lepton/recoder.cc
|
||||
@@ -99,5 +99,5 @@
|
||||
|
||||
static bool aligned_memchr16ff(const unsigned char *local_huff_data) {
|
||||
-#if 1
|
||||
+#if !defined(__i386__)
|
||||
__m128i buf = _mm_load_si128((__m128i const*)local_huff_data);
|
||||
__m128i ff = _mm_set1_epi8(-1);
|
@ -1,19 +1,4 @@
|
||||
+++ src/io/MemMgrAllocator.cc
|
||||
@@ -185,5 +185,5 @@
|
||||
}
|
||||
if (!data) {
|
||||
- fprintf(stderr, "Insufficient memory: unable to mmap or calloc %ld bytes\n", total_size);
|
||||
+ fprintf(stderr, "Insufficient memory: unable to mmap or calloc %zu bytes\n", total_size);
|
||||
fflush(stderr);
|
||||
exit(37);
|
||||
+++ test_suite/timing_driver.cc
|
||||
@@ -79,5 +81,5 @@
|
||||
}
|
||||
} else {
|
||||
- fprintf(stderr, "Files differ in size %ld != %ld\n", data_size, roundtrip_size);
|
||||
+ fprintf(stderr, "Files differ in size %zu != %zu\n", data_size, roundtrip_size);
|
||||
}
|
||||
int status;
|
||||
@@ -396,5 +398,5 @@
|
||||
leptonBuffer.size());
|
||||
if (result != testImage.size()) {
|
||||
@ -21,13 +6,6 @@
|
||||
+ fprintf(stderr, "Output Size %zu != %zu\n", result, testImage.size());
|
||||
}
|
||||
always_assert(result == (size_t)testImage.size() &&
|
||||
@@ -508,5 +510,5 @@
|
||||
for (std::vector<const char *>::const_iterator filename = filenames.begin(); filename != filenames.end(); ++filename) {
|
||||
testImage = load(*filename);
|
||||
- fprintf(stderr, "Loading iPhone %ld\n", testImage.size());
|
||||
+ fprintf(stderr, "Loading iPhone %u\n", testImage.size());
|
||||
int retval = run_test(testImage,
|
||||
use_lepton, jailed, inject_syscall_level, allow_progressive_files, multithread,
|
||||
+++ src/lepton/validation.cc
|
||||
@@ -159,5 +159,5 @@
|
||||
}
|
||||
@ -40,13 +18,96 @@
|
||||
@@ -339,3 +339,3 @@
|
||||
static_assert(sizeof(buffer) >= header.size(), "Buffer must be able to hold header");
|
||||
- uint32_t cursor = 0;
|
||||
+ ssize_t cursor = 0;
|
||||
+ size_t cursor = 0;
|
||||
bool finished = false;
|
||||
+++ src/lepton/jpgcoder.cc
|
||||
@@ -1101,5 +1098,5 @@
|
||||
if (false) {
|
||||
fprintf(stderr,
|
||||
- "Predicted Decompress %ld\nAllocated This Run %ld vs Max allocated %ld\nMax Peak Size %ld vs %ld\naug-gbg %ld, garbage %ld\nbit_writer %ld\nmux %d\n",
|
||||
+ "Predicted Decompress %zu\nAllocated This Run %zu vs Max allocated %zu\nMax Peak Size %zu vs %zu\naug-gbg %zu, garbage %zu\nbit_writer %zu\nmux %d\n",
|
||||
decom_memory_bound,
|
||||
Sirikata::memmgr_size_allocated(),
|
||||
@@ -576,5 +576,5 @@
|
||||
if (del > 0) {
|
||||
//fprintf(stderr, "D\n");
|
||||
- if (del < cursor) {
|
||||
+ if ((size_t)del < cursor) {
|
||||
//fprintf(stderr, "E %ld %ld\n", del, cursor - del);
|
||||
memmove(buffer, buffer + del, cursor - del);
|
||||
--- src/vp8/encoder/encoder.cc 2022-04-13 03:01:03.000000000 -0400
|
||||
+++ src/vp8/encoder/encoder.cc 2023-12-17 18:33:33.889333000 -0500
|
||||
@@ -380,12 +380,4 @@
|
||||
}
|
||||
|
||||
- double delta = 0;
|
||||
- for (int i = 0; i < 64; ++i) {
|
||||
- delta += outp[i] - outp_sans_dc[i];
|
||||
- //fprintf (stderr, "%d + %d = %d\n", outp_sans_dc[i], context.here().dc(), outp[i]);
|
||||
- }
|
||||
- delta /= 64;
|
||||
- //fprintf (stderr, "==== %f = %f =?= %d\n", delta, delta * 8, context.here().dc());
|
||||
-
|
||||
int debug_width = LeptonDebug::getDebugWidth((int)color);
|
||||
int offset = k_debug_block[(int)color];
|
||||
--- test_suite/test_invariants.cc 2022-04-13 03:01:03.000000000 -0400
|
||||
+++ test_suite/test_invariants.cc 2023-12-17 18:37:42.915625000 -0500
|
||||
@@ -464,5 +464,5 @@
|
||||
uint8_t* d =&aligned7d.at(0, 2, 1, 3, 2, 1, 0);
|
||||
*d = 4;
|
||||
- size_t offset = d - (uint8_t*)nullptr;
|
||||
+ size_t offset = (uintptr_t)d;
|
||||
always_assert(0 == (offset & 15) && "Must have alignment");
|
||||
always_assert(aligned7d.at(0, 2, 1, 3, 2, 1, 0) == 4);
|
||||
@@ -470,5 +470,5 @@
|
||||
uint8_t* d2 =&a7.at(0, 2, 1, 3, 2, 1, 0);
|
||||
*d2 = 5;
|
||||
- offset = d2 - (uint8_t*)nullptr;
|
||||
+ offset = (uintptr_t)d2;
|
||||
if (offset & 15) {
|
||||
fprintf(stderr, "Array7d array doesn't require alignment");
|
||||
--- src/vp8/model/model.cc 2022-04-13 03:01:03.000000000 -0400
|
||||
+++ src/vp8/model/model.cc 2023-12-17 18:40:51.433134000 -0500
|
||||
@@ -44,8 +44,8 @@
|
||||
__m256i r1 = _mm256_loadu_si256((const __m256i*)(data + 32));
|
||||
__m256i r2 = _mm256_loadu_si256((const __m256i*)(data + 64));
|
||||
- size_t offset = data - (char*)0;
|
||||
+ size_t offset = (uintptr_t)data;
|
||||
size_t align = 32 - (offset % 32);
|
||||
char * dataend = (char*)end;
|
||||
- size_t offsetend = dataend - (char*)0;
|
||||
+ size_t offsetend = (uintptr_t)dataend;
|
||||
__m256i *write_end = (__m256i*)(dataend - (offsetend % 32));
|
||||
__m256i *write_cursor = (__m256i*)(data + align);
|
||||
--- src/vp8/util/block_based_image.hh 2022-04-13 03:01:03.000000000 -0400
|
||||
+++ src/vp8/util/block_based_image.hh 2023-12-17 18:41:56.054633000 -0500
|
||||
@@ -67,5 +67,5 @@
|
||||
nblocks_ = nblocks;
|
||||
storage_ = (uint8_t*)custom_calloc(nblocks * sizeof(Block) + 31);
|
||||
- size_t offset = storage_ - (uint8_t*)nullptr;
|
||||
+ size_t offset = (uintptr_t)storage_;
|
||||
if (offset & 31) { //needs alignment adjustment
|
||||
image_ = (Block*)(storage_ + 32 - (offset & 31));
|
||||
--- src/lepton/idct.cc 2022-04-13 03:01:03.000000000 -0400
|
||||
+++ src/lepton/idct.cc 2023-12-17 18:46:58.471208000 -0500
|
||||
@@ -198,5 +198,5 @@
|
||||
char vintermed_storage[64 * sizeof(int32_t) + 16];
|
||||
// align intermediate storage to 16 bytes
|
||||
- int32_t *vintermed = (int32_t*) (vintermed_storage + 16 - ((vintermed_storage - (char*)nullptr) &0xf));
|
||||
+ int32_t *vintermed = (int32_t*) (vintermed_storage + 16 - ((uintptr_t)vintermed_storage &0xf));
|
||||
using namespace idct_local;
|
||||
// Horizontal 1-D IDCT.
|
||||
--- src/io/Zlib0.hh 2022-04-13 03:01:03.000000000 -0400
|
||||
+++ src/io/Zlib0.hh 2023-12-17 18:48:25.076584000 -0500
|
||||
@@ -39,5 +39,4 @@
|
||||
uint32_t mAdler32; // adler32 sum
|
||||
bool mClosed;
|
||||
- uint16_t mBilledBytesLeft;
|
||||
std::pair<uint32, JpegError> writeHeader();
|
||||
|
||||
--- src/lepton/jpgcoder.cc 2022-04-13 03:01:03.000000000 -0400
|
||||
+++ src/lepton/jpgcoder.cc 2023-12-17 18:58:02.650346000 -0500
|
||||
@@ -1245,11 +1245,4 @@
|
||||
current_run_size = cumulative_buffer_size;
|
||||
|
||||
- size_t bit_writer_augmentation = 0;
|
||||
- if (g_allow_progressive) {
|
||||
- for (size_t cur_size = jpgfilesize - 1; cur_size; cur_size >>=1) {
|
||||
- bit_writer_augmentation |= cur_size;
|
||||
- }
|
||||
- bit_writer_augmentation += 1; // this is used to compute the buffer size of the abit_writer for writing
|
||||
- }
|
||||
size_t garbage_augmentation = 0;
|
||||
for (size_t cur_size = hdrs - 1; cur_size; cur_size >>=1) {
|
||||
|
Loading…
Reference in New Issue
Block a user