mirror of
https://git.FreeBSD.org/ports.git
synced 2024-11-18 00:10:04 +00:00
math/sfft: port to armv7/aarch64, touch up
- replace complex.h hack with less crude hack - use sse2neon to build on armv7/aarch64 - armv7 should work, but falls to an unrelated issue - touch up CFLAGS slightly - rework do-test
This commit is contained in:
parent
064036198c
commit
9804638940
@ -1,6 +1,6 @@
|
||||
PORTNAME= sfft
|
||||
PORTVERSION= 0.1.0
|
||||
PORTREVISION= 12
|
||||
DISTVERSION= 0.1.0
|
||||
PORTREVISION= 13
|
||||
CATEGORIES= math
|
||||
MASTER_SITES= http://spiral.net/software/sfft/ LOCAL/bf
|
||||
DISTFILES= ${DISTNAME}${EXTRACT_SUFX}
|
||||
@ -12,12 +12,18 @@ WWW= https://spiral.net/software/sfft.html
|
||||
|
||||
LICENSE= GPLv2
|
||||
|
||||
ONLY_FOR_ARCHS= amd64 i386
|
||||
ONLY_FOR_ARCHS= aarch64 amd64 armv7 i386
|
||||
ONLY_FOR_ARCHS_REASON= requires SSE instructions, which are x86-specific
|
||||
|
||||
BROKEN_armv7= /usr/local/bin/ld: error: unsupported option: -z relro
|
||||
|
||||
BUILD_DEPENDS_aarch64= ${LOCALBASE}/include/sse2neon.h:devel/sse2neon
|
||||
BUILD_DEPENDS_armv7= ${BUILD_DEPENDS_aarch64}
|
||||
BUILD_DEPENDS+= ${BUILD_DEPENDS_${ARCH}}
|
||||
|
||||
LIB_DEPENDS= libfftw3.so:math/fftw3
|
||||
|
||||
USES= uidfix zip
|
||||
USES= localbase:ldflags uidfix zip
|
||||
USE_GCC= yes
|
||||
USE_LDCONFIG= yes
|
||||
|
||||
@ -26,8 +32,10 @@ OPTIONS_DEFAULT= OPTIMIZED_CFLAGS
|
||||
|
||||
BUILD_WRKSRC= ${WRKSRC}/src
|
||||
INSTALL_WRKSRC= ${BUILD_WRKSRC}
|
||||
CFLAGS+= -fopenmp -msse2 -Iflopcount -I${LOCALBASE}/include
|
||||
LDFLAGS+= -L${LOCALBASE}/lib
|
||||
CFLAGS_armv7= -mfpu=neon
|
||||
CFLAGS_i386= -msse2
|
||||
CFLAGS+= -fopenmp -Iflopcount
|
||||
CXXFLAGS+= ${CFLAGS_${ARCH}} -fopenmp -Iflopcount
|
||||
HEADERS= sfft.h
|
||||
HDIR= include/sfft
|
||||
MAKE_ENV= LDADD="-lfftw3 ${LIBM}" LIB=sfft SHLIB_MAJOR="${SHLIB_MAJOR}" \
|
||||
@ -43,11 +51,6 @@ SRCS= common.cc computefourier-1.0-2.0.cc \
|
||||
|
||||
.include <bsd.port.options.mk>
|
||||
|
||||
.if !${ARCH:Mamd64} && !${MACHINE_CPU:Msse2}
|
||||
IGNORE= this port requires SSE2, and benefits from SSE3 -- set CPUTYPE\
|
||||
appropriately
|
||||
.endif
|
||||
|
||||
LIBM= -lm
|
||||
|
||||
.if ${PORT_OPTIONS:MDOCS} || make(makesum)
|
||||
@ -73,36 +76,20 @@ MAKE_ENV+= WITHOUT_PROFILE=yes
|
||||
.endif
|
||||
|
||||
post-extract:
|
||||
@${CP} /usr/include/complex.h ${BUILD_WRKSRC}/sfftcomplex.h
|
||||
@${PRINTF} "LIBDIR=\t${PREFIX}/lib\n.include <bsd.lib.mk>\n" > \
|
||||
${BUILD_WRKSRC}/Makefile
|
||||
|
||||
post-patch:
|
||||
@${REINPLACE_CMD} -e 's/string\.h/cstring/' \
|
||||
${WRKSRC}/src/utils.cc
|
||||
@${REINPLACE_CMD} -E -e '/<complex\.h>/ \
|
||||
{s/<complex\.h>/ "sfftcomplex.h"/; x ; \
|
||||
s|^.*$$|#endif|; G; x; \
|
||||
s|^.*$$|extern "C" {|; G; x; \
|
||||
s|^.*$$|#ifdef __cplusplus|; G; x; \
|
||||
s|^.*$$|#ifdef __cplusplus|; H; \
|
||||
s|^.*$$|}|; H; \
|
||||
s|^.*$$|#endif|; H; x;}' \
|
||||
${WRKSRC}/src/computefourier-1.0-2.0.h \
|
||||
${WRKSRC}/src/computefourier-3.0.h \
|
||||
${WRKSRC}/src/fft.h
|
||||
|
||||
CORELIMIT?= /usr/bin/limits -Sc 0
|
||||
|
||||
do-test:
|
||||
@cd ${BUILD_WRKSRC}; \
|
||||
${CXX} ${CXXFLAGS} -o sfft-verification verification.cc \
|
||||
${LDFLAGS} libsfft.a -lfftw3 ${LIBM} ; \
|
||||
for _v in 1 2 3 ; do \
|
||||
for _k in 5 10 50; do \
|
||||
echo "Checking sfft version $${_v} with $${_k} frequency components:"; \
|
||||
${CORELIMIT} ./sfft-verification -k $${_k} -r 3 -v $${_v} || ${TRUE} ; \
|
||||
done ; done
|
||||
cd ${BUILD_WRKSRC} && ${CXX} ${CXXFLAGS} -o sfft-verification verification.cc \
|
||||
${LDFLAGS} libsfft.a -lfftw3 ${LIBM}
|
||||
.for v in 1 2 3
|
||||
. for k in 5 10 50
|
||||
@${ECHO_CMD} "Checking sfft version $v with $k frequency components:"
|
||||
cd ${BUILD_WRKSRC} && ${CORELIMIT} ./sfft-verification -k $k -r 3 -v $v || ${TRUE}
|
||||
. endfor
|
||||
.endfor
|
||||
|
||||
post-install:
|
||||
@${MKDIR} ${STAGEDIR}${PREFIX}/${HDIR}
|
||||
|
@ -1,6 +1,6 @@
|
||||
--- src/computefourier-1.0-2.0.cc.orig 2013-06-13 08:12:25.000000000 -0400
|
||||
+++ src/computefourier-1.0-2.0.cc 2013-08-09 00:26:54.000000000 -0400
|
||||
@@ -248,8 +248,13 @@
|
||||
--- src/computefourier-1.0-2.0.cc.orig 2013-06-13 12:12:25 UTC
|
||||
+++ src/computefourier-1.0-2.0.cc
|
||||
@@ -248,8 +248,13 @@ inner_loop_locate(sfft_v1v2_data * data, complex_t * o
|
||||
__m128d ad_bc = _mm_mul_pd(ab, dc);
|
||||
__m128d ac_mbd = _mm_mul_pd(ac_bd, signs);
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
unsigned int i_mod_B_p_offset = (i & B2_m_1) + offset;
|
||||
__m128d xy = _mm_load_pd(d_x_sampt + i_mod_B_p_offset);
|
||||
__m128d st = _mm_add_pd(xy, ab_times_cd);
|
||||
@@ -283,7 +288,13 @@
|
||||
@@ -283,7 +288,13 @@ inner_loop_locate(sfft_v1v2_data * data, complex_t * o
|
||||
__m128d ab_square = _mm_mul_pd(ab, ab);
|
||||
__m128d cd_square = _mm_mul_pd(cd, cd);
|
||||
|
||||
@ -29,7 +29,7 @@
|
||||
|
||||
_mm_store_pd(samples + j, r);
|
||||
}
|
||||
@@ -390,11 +401,23 @@
|
||||
@@ -390,11 +401,23 @@ estimate_values(sfft_v1v2_data * data, const int *hits
|
||||
__m128d ad_bc = _mm_mul_pd(ab, dc);
|
||||
__m128d mad_bc = _mm_mul_pd(ad_bc, signs);
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
--- src/computefourier-3.0.cc.orig 2013-06-13 08:12:26.000000000 -0400
|
||||
+++ src/computefourier-3.0.cc 2013-08-10 17:02:52.000000000 -0400
|
||||
@@ -416,27 +416,64 @@
|
||||
--- src/computefourier-3.0.cc.orig 2013-06-13 12:12:26 UTC
|
||||
+++ src/computefourier-3.0.cc
|
||||
@@ -416,27 +416,64 @@ update_gaussian_loops2(int key, complex_t value, compl
|
||||
|
||||
__m128d t1r = _mm_mul_pd(v1r, ab31);
|
||||
__m128d t1i = _mm_mul_pd(v1i, ba31);
|
||||
@ -65,7 +65,7 @@
|
||||
|
||||
FLOPCOUNT_INCREMENT(6 * (4 + 2));
|
||||
|
||||
@@ -524,11 +561,28 @@
|
||||
@@ -524,11 +561,28 @@ estimate_freq_gauss_loops2(sfft_v3_data * data, int WH
|
||||
__m128d a3b3_sq = _mm_mul_pd(a3b3, a3b3);
|
||||
FLOPCOUNT_INCREMENT(8);
|
||||
|
||||
@ -94,7 +94,7 @@
|
||||
FLOPCOUNT_INCREMENT(1);
|
||||
|
||||
_mm_store_pd(zero_buck_check, zbc);
|
||||
@@ -681,13 +735,35 @@
|
||||
@@ -681,13 +735,35 @@ estimate_freq_mansour_loops2(sfft_v3_data * data, int
|
||||
__m128d a3b3_sq = _mm_mul_pd(a3b3, a3b3);
|
||||
FLOPCOUNT_INCREMENT(8);
|
||||
|
||||
|
15
math/sfft/files/patch-src_fft.h
Normal file
15
math/sfft/files/patch-src_fft.h
Normal file
@ -0,0 +1,15 @@
|
||||
--- src/fft.h.orig 2024-10-15 16:15:00 UTC
|
||||
+++ src/fft.h
|
||||
@@ -32,6 +32,12 @@
|
||||
//#define USE_FLOAT
|
||||
#define USE_DOUBLE
|
||||
|
||||
+#ifdef __cplusplus
|
||||
+#define complex __complex__
|
||||
+#undef I
|
||||
+#define I ((float __complex__)1.0j)
|
||||
+#endif
|
||||
+
|
||||
#ifdef USE_FLOAT
|
||||
typedef float complex complex_t;
|
||||
typedef float real_t;
|
13
math/sfft/files/patch-src_intrinsics.h
Normal file
13
math/sfft/files/patch-src_intrinsics.h
Normal file
@ -0,0 +1,13 @@
|
||||
--- src/intrinsics.h.orig 2024-10-15 15:55:56 UTC
|
||||
+++ src/intrinsics.h
|
||||
@@ -20,7 +20,9 @@
|
||||
*/
|
||||
|
||||
|
||||
-#if defined(__ICC)
|
||||
+#if defined(__arm__) || defined(__aarch64__)
|
||||
+#include <sse2neon.h>
|
||||
+#elif defined(__ICC)
|
||||
#include <xmmintrin.h>
|
||||
#elif defined(__GNUC__)
|
||||
#include <x86intrin.h>
|
11
math/sfft/files/patch-src_simulation.cc
Normal file
11
math/sfft/files/patch-src_simulation.cc
Normal file
@ -0,0 +1,11 @@
|
||||
--- src/simulation.cc.orig 2024-10-15 16:05:30 UTC
|
||||
+++ src/simulation.cc
|
||||
@@ -50,7 +50,7 @@ void simulation::setup(int argc, char **argv)
|
||||
int version = 1;
|
||||
int fftw_opt = FFTW_ESTIMATE;
|
||||
|
||||
- char ch;
|
||||
+ int ch;
|
||||
while ((ch = getopt(argc, argv, "hton:k:r:v:")) != EOF)
|
||||
{
|
||||
switch (ch)
|
11
math/sfft/files/patch-src_timing__many.cc
Normal file
11
math/sfft/files/patch-src_timing__many.cc
Normal file
@ -0,0 +1,11 @@
|
||||
--- src/timing_many.cc.orig 2024-10-15 16:05:53 UTC
|
||||
+++ src/timing_many.cc
|
||||
@@ -51,7 +51,7 @@ parse_arguments(int argc, char **argv, int *n, int *k,
|
||||
parse_arguments(int argc, char **argv, int *n, int *k, int *num_inputs,
|
||||
int *version, int *fftw_opt, bool * simple_parallelism)
|
||||
{
|
||||
- char ch;
|
||||
+ int ch;
|
||||
while ((ch = getopt(argc, argv, "htosi:n:k:v:")) != EOF)
|
||||
{
|
||||
switch (ch)
|
Loading…
Reference in New Issue
Block a user