1
0
mirror of https://git.FreeBSD.org/ports.git synced 2024-11-18 00:10:04 +00:00

math/sfft: port to armv7/aarch64, touch up

- replace complex.h hack with less crude hack
 - use sse2neon to build on armv7/aarch64
 - armv7 should work, but falls to an unrelated issue
 - touch up CFLAGS slightly
 - rework do-test
This commit is contained in:
Robert Clausecker 2024-10-15 18:25:07 +02:00
parent 064036198c
commit 9804638940
7 changed files with 82 additions and 45 deletions

View File

@ -1,6 +1,6 @@
PORTNAME= sfft
PORTVERSION= 0.1.0
PORTREVISION= 12
DISTVERSION= 0.1.0
PORTREVISION= 13
CATEGORIES= math
MASTER_SITES= http://spiral.net/software/sfft/ LOCAL/bf
DISTFILES= ${DISTNAME}${EXTRACT_SUFX}
@ -12,12 +12,18 @@ WWW= https://spiral.net/software/sfft.html
LICENSE= GPLv2
ONLY_FOR_ARCHS= amd64 i386
ONLY_FOR_ARCHS= aarch64 amd64 armv7 i386
ONLY_FOR_ARCHS_REASON= requires SSE instructions, which are x86-specific
BROKEN_armv7= /usr/local/bin/ld: error: unsupported option: -z relro
BUILD_DEPENDS_aarch64= ${LOCALBASE}/include/sse2neon.h:devel/sse2neon
BUILD_DEPENDS_armv7= ${BUILD_DEPENDS_aarch64}
BUILD_DEPENDS+= ${BUILD_DEPENDS_${ARCH}}
LIB_DEPENDS= libfftw3.so:math/fftw3
USES= uidfix zip
USES= localbase:ldflags uidfix zip
USE_GCC= yes
USE_LDCONFIG= yes
@ -26,8 +32,10 @@ OPTIONS_DEFAULT= OPTIMIZED_CFLAGS
BUILD_WRKSRC= ${WRKSRC}/src
INSTALL_WRKSRC= ${BUILD_WRKSRC}
CFLAGS+= -fopenmp -msse2 -Iflopcount -I${LOCALBASE}/include
LDFLAGS+= -L${LOCALBASE}/lib
CFLAGS_armv7= -mfpu=neon
CFLAGS_i386= -msse2
CFLAGS+= -fopenmp -Iflopcount
CXXFLAGS+= ${CFLAGS_${ARCH}} -fopenmp -Iflopcount
HEADERS= sfft.h
HDIR= include/sfft
MAKE_ENV= LDADD="-lfftw3 ${LIBM}" LIB=sfft SHLIB_MAJOR="${SHLIB_MAJOR}" \
@ -43,11 +51,6 @@ SRCS= common.cc computefourier-1.0-2.0.cc \
.include <bsd.port.options.mk>
.if !${ARCH:Mamd64} && !${MACHINE_CPU:Msse2}
IGNORE= this port requires SSE2, and benefits from SSE3 -- set CPUTYPE\
appropriately
.endif
LIBM= -lm
.if ${PORT_OPTIONS:MDOCS} || make(makesum)
@ -73,36 +76,20 @@ MAKE_ENV+= WITHOUT_PROFILE=yes
.endif
post-extract:
@${CP} /usr/include/complex.h ${BUILD_WRKSRC}/sfftcomplex.h
@${PRINTF} "LIBDIR=\t${PREFIX}/lib\n.include <bsd.lib.mk>\n" > \
${BUILD_WRKSRC}/Makefile
post-patch:
@${REINPLACE_CMD} -e 's/string\.h/cstring/' \
${WRKSRC}/src/utils.cc
@${REINPLACE_CMD} -E -e '/<complex\.h>/ \
{s/<complex\.h>/ "sfftcomplex.h"/; x ; \
s|^.*$$|#endif|; G; x; \
s|^.*$$|extern "C" {|; G; x; \
s|^.*$$|#ifdef __cplusplus|; G; x; \
s|^.*$$|#ifdef __cplusplus|; H; \
s|^.*$$|}|; H; \
s|^.*$$|#endif|; H; x;}' \
${WRKSRC}/src/computefourier-1.0-2.0.h \
${WRKSRC}/src/computefourier-3.0.h \
${WRKSRC}/src/fft.h
CORELIMIT?= /usr/bin/limits -Sc 0
do-test:
@cd ${BUILD_WRKSRC}; \
${CXX} ${CXXFLAGS} -o sfft-verification verification.cc \
${LDFLAGS} libsfft.a -lfftw3 ${LIBM} ; \
for _v in 1 2 3 ; do \
for _k in 5 10 50; do \
echo "Checking sfft version $${_v} with $${_k} frequency components:"; \
${CORELIMIT} ./sfft-verification -k $${_k} -r 3 -v $${_v} || ${TRUE} ; \
done ; done
cd ${BUILD_WRKSRC} && ${CXX} ${CXXFLAGS} -o sfft-verification verification.cc \
${LDFLAGS} libsfft.a -lfftw3 ${LIBM}
.for v in 1 2 3
. for k in 5 10 50
@${ECHO_CMD} "Checking sfft version $v with $k frequency components:"
cd ${BUILD_WRKSRC} && ${CORELIMIT} ./sfft-verification -k $k -r 3 -v $v || ${TRUE}
. endfor
.endfor
post-install:
@${MKDIR} ${STAGEDIR}${PREFIX}/${HDIR}

View File

@ -1,6 +1,6 @@
--- src/computefourier-1.0-2.0.cc.orig 2013-06-13 08:12:25.000000000 -0400
+++ src/computefourier-1.0-2.0.cc 2013-08-09 00:26:54.000000000 -0400
@@ -248,8 +248,13 @@
--- src/computefourier-1.0-2.0.cc.orig 2013-06-13 12:12:25 UTC
+++ src/computefourier-1.0-2.0.cc
@@ -248,8 +248,13 @@ inner_loop_locate(sfft_v1v2_data * data, complex_t * o
__m128d ad_bc = _mm_mul_pd(ab, dc);
__m128d ac_mbd = _mm_mul_pd(ac_bd, signs);
@ -15,7 +15,7 @@
unsigned int i_mod_B_p_offset = (i & B2_m_1) + offset;
__m128d xy = _mm_load_pd(d_x_sampt + i_mod_B_p_offset);
__m128d st = _mm_add_pd(xy, ab_times_cd);
@@ -283,7 +288,13 @@
@@ -283,7 +288,13 @@ inner_loop_locate(sfft_v1v2_data * data, complex_t * o
__m128d ab_square = _mm_mul_pd(ab, ab);
__m128d cd_square = _mm_mul_pd(cd, cd);
@ -29,7 +29,7 @@
_mm_store_pd(samples + j, r);
}
@@ -390,11 +401,23 @@
@@ -390,11 +401,23 @@ estimate_values(sfft_v1v2_data * data, const int *hits
__m128d ad_bc = _mm_mul_pd(ab, dc);
__m128d mad_bc = _mm_mul_pd(ad_bc, signs);

View File

@ -1,6 +1,6 @@
--- src/computefourier-3.0.cc.orig 2013-06-13 08:12:26.000000000 -0400
+++ src/computefourier-3.0.cc 2013-08-10 17:02:52.000000000 -0400
@@ -416,27 +416,64 @@
--- src/computefourier-3.0.cc.orig 2013-06-13 12:12:26 UTC
+++ src/computefourier-3.0.cc
@@ -416,27 +416,64 @@ update_gaussian_loops2(int key, complex_t value, compl
__m128d t1r = _mm_mul_pd(v1r, ab31);
__m128d t1i = _mm_mul_pd(v1i, ba31);
@ -65,7 +65,7 @@
FLOPCOUNT_INCREMENT(6 * (4 + 2));
@@ -524,11 +561,28 @@
@@ -524,11 +561,28 @@ estimate_freq_gauss_loops2(sfft_v3_data * data, int WH
__m128d a3b3_sq = _mm_mul_pd(a3b3, a3b3);
FLOPCOUNT_INCREMENT(8);
@ -94,7 +94,7 @@
FLOPCOUNT_INCREMENT(1);
_mm_store_pd(zero_buck_check, zbc);
@@ -681,13 +735,35 @@
@@ -681,13 +735,35 @@ estimate_freq_mansour_loops2(sfft_v3_data * data, int
__m128d a3b3_sq = _mm_mul_pd(a3b3, a3b3);
FLOPCOUNT_INCREMENT(8);

View File

@ -0,0 +1,15 @@
--- src/fft.h.orig 2024-10-15 16:15:00 UTC
+++ src/fft.h
@@ -32,6 +32,12 @@
//#define USE_FLOAT
#define USE_DOUBLE
+#ifdef __cplusplus
+#define complex __complex__
+#undef I
+#define I ((float __complex__)1.0j)
+#endif
+
#ifdef USE_FLOAT
typedef float complex complex_t;
typedef float real_t;

View File

@ -0,0 +1,13 @@
--- src/intrinsics.h.orig 2024-10-15 15:55:56 UTC
+++ src/intrinsics.h
@@ -20,7 +20,9 @@
*/
-#if defined(__ICC)
+#if defined(__arm__) || defined(__aarch64__)
+#include <sse2neon.h>
+#elif defined(__ICC)
#include <xmmintrin.h>
#elif defined(__GNUC__)
#include <x86intrin.h>

View File

@ -0,0 +1,11 @@
--- src/simulation.cc.orig 2024-10-15 16:05:30 UTC
+++ src/simulation.cc
@@ -50,7 +50,7 @@ void simulation::setup(int argc, char **argv)
int version = 1;
int fftw_opt = FFTW_ESTIMATE;
- char ch;
+ int ch;
while ((ch = getopt(argc, argv, "hton:k:r:v:")) != EOF)
{
switch (ch)

View File

@ -0,0 +1,11 @@
--- src/timing_many.cc.orig 2024-10-15 16:05:53 UTC
+++ src/timing_many.cc
@@ -51,7 +51,7 @@ parse_arguments(int argc, char **argv, int *n, int *k,
parse_arguments(int argc, char **argv, int *n, int *k, int *num_inputs,
int *version, int *fftw_opt, bool * simple_parallelism)
{
- char ch;
+ int ch;
while ((ch = getopt(argc, argv, "htosi:n:k:v:")) != EOF)
{
switch (ch)