mirror of
https://git.FreeBSD.org/ports.git
synced 2024-12-11 02:50:24 +00:00
Pull in upstream commits to our libx264 to deal with the clang 3.5
-integrated-as defaults for ARMv6 compilation. Handling for koobs@ while he's on vacation. PR: 196591 Submitted by: mikael.urankar@gmail.com Approved by: mentor (implicit)
This commit is contained in:
parent
201b450d44
commit
99a42b0dad
Notes:
svn2git
2021-03-31 03:12:20 +00:00
svn path=/head/; revision=376488
@ -5,7 +5,7 @@
|
||||
|
||||
PORTNAME= libx264
|
||||
PORTVERSION= 0.${X264_BUILD}.${X264_REV}
|
||||
PORTREVISION= 1
|
||||
PORTREVISION= 2
|
||||
CATEGORIES= multimedia
|
||||
MASTER_SITES= ftp://ftp.videolan.org/pub/videolan/x264/snapshots/ \
|
||||
http://samples.mplayerhq.hu/yuv4mpeg2/:pgo
|
||||
|
40
multimedia/libx264/files/patch-common_arm_asm.S
Normal file
40
multimedia/libx264/files/patch-common_arm_asm.S
Normal file
@ -0,0 +1,40 @@
|
||||
--- common/arm/asm.S.orig 2014-08-27 20:45:08 UTC
|
||||
+++ common/arm/asm.S
|
||||
@@ -40,6 +40,12 @@
|
||||
# define ELF @
|
||||
#endif
|
||||
|
||||
+#if HAVE_AS_FUNC
|
||||
+# define FUNC
|
||||
+#else
|
||||
+# define FUNC @
|
||||
+#endif
|
||||
+
|
||||
.macro require8, val=1
|
||||
ELF .eabi_attribute 24, \val
|
||||
.endm
|
||||
@@ -49,17 +55,22 @@ ELF .eabi_attribute 25, \val
|
||||
.endm
|
||||
|
||||
.macro function name, export=1
|
||||
+ .macro endfunc
|
||||
+ELF .size \name, . - \name
|
||||
+FUNC .endfunc
|
||||
+ .purgem endfunc
|
||||
+ .endm
|
||||
.align 2
|
||||
.if \export == 1
|
||||
.global EXTERN_ASM\name
|
||||
ELF .hidden EXTERN_ASM\name
|
||||
ELF .type EXTERN_ASM\name, %function
|
||||
- .func EXTERN_ASM\name
|
||||
+FUNC .func EXTERN_ASM\name
|
||||
EXTERN_ASM\name:
|
||||
.else
|
||||
ELF .hidden \name
|
||||
ELF .type \name, %function
|
||||
- .func \name
|
||||
+FUNC .func \name
|
||||
\name:
|
||||
.endif
|
||||
.endm
|
34
multimedia/libx264/files/patch-common_arm_cpu-a.S
Normal file
34
multimedia/libx264/files/patch-common_arm_cpu-a.S
Normal file
@ -0,0 +1,34 @@
|
||||
--- common/arm/cpu-a.S.orig 2014-08-27 20:45:08 UTC
|
||||
+++ common/arm/cpu-a.S
|
||||
@@ -33,7 +33,7 @@
|
||||
function x264_cpu_neon_test
|
||||
vadd.i16 q0, q0, q0
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
// return: 0 on success
|
||||
// 1 if counters were already enabled
|
||||
@@ -49,14 +49,14 @@ function x264_cpu_enable_armv7_counter,
|
||||
mov r2, #1 << 31 // enable cycle counter
|
||||
mcr p15, 0, r2, c9, c12, 1 // write CNTENS
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_cpu_disable_armv7_counter, export=0
|
||||
mrc p15, 0, r0, c9, c12, 0 // read PMNC
|
||||
bic r0, r0, #1 // disable counters
|
||||
mcr p15, 0, r0, c9, c12, 0 // write PMNC
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
.macro READ_TIME r
|
||||
@@ -106,4 +106,4 @@ average_loop:
|
||||
cmp r0, #10
|
||||
movgt r0, #0
|
||||
pop {r4-r6,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
161
multimedia/libx264/files/patch-common_arm_dct-a.S
Normal file
161
multimedia/libx264/files/patch-common_arm_dct-a.S
Normal file
@ -0,0 +1,161 @@
|
||||
--- common/arm/dct-a.S.orig 2014-08-27 20:45:08 UTC
|
||||
+++ common/arm/dct-a.S
|
||||
@@ -82,7 +82,7 @@ function x264_dct4x4dc_neon
|
||||
vrhadd.s16 d3, d6, d7
|
||||
vst1.64 {d0-d3}, [r0,:128]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_idct4x4dc_neon
|
||||
vld1.64 {d0-d3}, [r0,:128]
|
||||
@@ -94,7 +94,7 @@ function x264_idct4x4dc_neon
|
||||
HADAMARD 2, sumsub, d3, d2, d6, d7
|
||||
vst1.64 {d0-d3}, [r0,:128]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
.macro DCT_1D d0 d1 d2 d3 d4 d5 d6 d7
|
||||
@@ -129,7 +129,7 @@ function x264_sub4x4_dct_neon
|
||||
DCT_1D d4, d5, d6, d7, d0, d1, d2, d3
|
||||
vst1.64 {d4-d7}, [r0,:128]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_sub8x4_dct_neon, export=0
|
||||
vld1.64 {d0}, [r1,:64], r3
|
||||
@@ -165,7 +165,7 @@ function x264_sub8x4_dct_neon, export=0
|
||||
vst1.64 {d4-d5}, [r0,:128]!
|
||||
vst1.64 {d6-d7}, [r0,:128]!
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_sub8x8_dct_neon
|
||||
push {lr}
|
||||
@@ -174,7 +174,7 @@ function x264_sub8x8_dct_neon
|
||||
bl x264_sub8x4_dct_neon
|
||||
pop {lr}
|
||||
b x264_sub8x4_dct_neon
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_sub16x16_dct_neon
|
||||
push {lr}
|
||||
@@ -195,7 +195,7 @@ function x264_sub16x16_dct_neon
|
||||
bl x264_sub8x4_dct_neon
|
||||
pop {lr}
|
||||
b x264_sub8x4_dct_neon
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
.macro DCT8_1D type
|
||||
@@ -279,7 +279,7 @@ function x264_sub8x8_dct8_neon
|
||||
vst1.64 {d24-d27}, [r0,:128]!
|
||||
vst1.64 {d28-d31}, [r0,:128]!
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_sub16x16_dct8_neon
|
||||
push {lr}
|
||||
@@ -294,7 +294,7 @@ function x264_sub16x16_dct8_neon
|
||||
sub r1, r1, #FENC_STRIDE*8 - 8
|
||||
sub r2, r2, #FDEC_STRIDE*8 - 8
|
||||
b X(x264_sub8x8_dct8_neon)
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
// First part of IDCT (minus final SUMSUB_BA)
|
||||
@@ -336,7 +336,7 @@ function x264_add4x4_idct_neon
|
||||
vst1.32 {d2[1]}, [r0,:32], r2
|
||||
vst1.32 {d2[0]}, [r0,:32], r2
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_add8x4_idct_neon, export=0
|
||||
vld1.64 {d0-d3}, [r1,:128]!
|
||||
@@ -376,7 +376,7 @@ function x264_add8x4_idct_neon, export=0
|
||||
vst1.32 {d2}, [r0,:64], r2
|
||||
vst1.32 {d3}, [r0,:64], r2
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_add8x8_idct_neon
|
||||
mov r2, #FDEC_STRIDE
|
||||
@@ -384,7 +384,7 @@ function x264_add8x8_idct_neon
|
||||
bl x264_add8x4_idct_neon
|
||||
mov lr, ip
|
||||
b x264_add8x4_idct_neon
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_add16x16_idct_neon
|
||||
mov r2, #FDEC_STRIDE
|
||||
@@ -401,7 +401,7 @@ function x264_add16x16_idct_neon
|
||||
bl x264_add8x4_idct_neon
|
||||
mov lr, ip
|
||||
b x264_add8x4_idct_neon
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
.macro IDCT8_1D type
|
||||
@@ -498,7 +498,7 @@ function x264_add8x8_idct8_neon
|
||||
vst1.64 {d6}, [r0,:64], r2
|
||||
vst1.64 {d7}, [r0,:64], r2
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_add16x16_idct8_neon
|
||||
mov ip, lr
|
||||
@@ -510,7 +510,7 @@ function x264_add16x16_idct8_neon
|
||||
sub r0, r0, #8*FDEC_STRIDE-8
|
||||
mov lr, ip
|
||||
b X(x264_add8x8_idct8_neon)
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
function x264_add8x8_idct_dc_neon
|
||||
@@ -562,7 +562,7 @@ function x264_add8x8_idct_dc_neon
|
||||
vst1.64 {d6}, [r0,:64], r2
|
||||
vst1.64 {d7}, [r0,:64], r2
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
.macro ADD16x4_IDCT_DC dc
|
||||
vld1.64 {d16-d17}, [r0,:128], r3
|
||||
@@ -610,7 +610,7 @@ function x264_add16x16_idct_dc_neon
|
||||
ADD16x4_IDCT_DC d2
|
||||
ADD16x4_IDCT_DC d3
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_sub8x8_dct_dc_neon
|
||||
mov r3, #FENC_STRIDE
|
||||
@@ -658,7 +658,7 @@ function x264_sub8x8_dct_dc_neon
|
||||
vpadd.s16 d0, d0, d1
|
||||
vst1.64 {d0}, [r0,:64]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
function x264_zigzag_scan_4x4_frame_neon
|
||||
@@ -671,4 +671,4 @@ function x264_zigzag_scan_4x4_frame_neon
|
||||
vtbl.8 d7, {d2-d3}, d19
|
||||
vst1.64 {d4-d7}, [r0,:128]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
44
multimedia/libx264/files/patch-common_arm_deblock-a.S
Normal file
44
multimedia/libx264/files/patch-common_arm_deblock-a.S
Normal file
@ -0,0 +1,44 @@
|
||||
--- common/arm/deblock-a.S.orig 2014-08-27 20:45:08 UTC
|
||||
+++ common/arm/deblock-a.S
|
||||
@@ -142,7 +142,7 @@ function x264_deblock_v_luma_neon
|
||||
|
||||
align_pop_regs
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_deblock_h_luma_neon
|
||||
h264_loop_filter_start
|
||||
@@ -194,7 +194,7 @@ function x264_deblock_h_luma_neon
|
||||
|
||||
align_pop_regs
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
.macro h264_loop_filter_chroma
|
||||
vdup.8 q11, r2 // alpha
|
||||
@@ -255,7 +255,7 @@ function x264_deblock_v_chroma_neon
|
||||
vst2.8 {d0, d1}, [r0,:128], r1
|
||||
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_deblock_h_chroma_neon
|
||||
h264_loop_filter_start
|
||||
@@ -303,7 +303,7 @@ function x264_deblock_h_chroma_neon
|
||||
vst1.8 {d3}, [r0], r1
|
||||
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_deblock_strength_neon
|
||||
ldr ip, [sp]
|
||||
@@ -409,4 +409,4 @@ lists:
|
||||
|
||||
vst1.8 {q8}, [r3,:128] @ bs[0]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
386
multimedia/libx264/files/patch-common_arm_mc-a.S
Normal file
386
multimedia/libx264/files/patch-common_arm_mc-a.S
Normal file
@ -0,0 +1,386 @@
|
||||
--- common/arm/mc-a.S.orig 2014-08-27 20:45:08 UTC
|
||||
+++ common/arm/mc-a.S
|
||||
@@ -50,7 +50,7 @@ function x264_prefetch_ref_arm
|
||||
pld [r3, r1, lsl #1]
|
||||
pld [r3, r2]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
// void prefetch_fenc( uint8_t *pix_y, intptr_t stride_y,
|
||||
// uint8_t *pix_uv, intptr_t stride_uv, int mb_x )
|
||||
@@ -76,7 +76,7 @@ function x264_prefetch_fenc_arm
|
||||
pld [ip]
|
||||
pld [ip, r3]
|
||||
pop {pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
// void *x264_memcpy_aligned( void *dst, const void *src, size_t n )
|
||||
@@ -85,7 +85,7 @@ function x264_memcpy_aligned_neon
|
||||
movrel ip, memcpy_table
|
||||
and r3, r3, #0xc
|
||||
ldr pc, [ip, r3]
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
.macro MEMCPY_ALIGNED srcalign dstalign
|
||||
function memcpy_aligned_\dstalign\()_\srcalign\()_neon, export=0
|
||||
@@ -127,7 +127,7 @@ function memcpy_aligned_\dstalign\()_\sr
|
||||
vst1.64 {d0}, [r3,:64]!
|
||||
.endif
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
.endm
|
||||
|
||||
MEMCPY_ALIGNED 16, 16
|
||||
@@ -156,7 +156,7 @@ memzero_loop:
|
||||
.endr
|
||||
bgt memzero_loop
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
// void pixel_avg( uint8_t *dst, intptr_t dst_stride,
|
||||
@@ -175,7 +175,7 @@ function x264_pixel_avg_\w\()x\h\()_neon
|
||||
cmp ip, #0
|
||||
bge x264_pixel_avg_weight_w\w\()_add_add_neon
|
||||
b x264_pixel_avg_weight_w\w\()_sub_add_neon // weight < 0
|
||||
-.endfunc
|
||||
+endfunc
|
||||
.endm
|
||||
|
||||
AVGH 4, 2
|
||||
@@ -253,7 +253,7 @@ function x264_pixel_avg_weight_w4_\ext\(
|
||||
vst1.32 {d1[0]}, [r0,:32], r1
|
||||
bgt 1b
|
||||
pop {r4-r6,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_avg_weight_w8_\ext\()_neon, export=0
|
||||
load_weights_\ext
|
||||
@@ -277,7 +277,7 @@ function x264_pixel_avg_weight_w8_\ext\(
|
||||
vst1.64 {d3}, [r0,:64], r1
|
||||
bgt 1b
|
||||
pop {r4-r6,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_avg_weight_w16_\ext\()_neon, export=0
|
||||
load_weights_\ext
|
||||
@@ -297,7 +297,7 @@ function x264_pixel_avg_weight_w16_\ext\
|
||||
vst1.64 {d2-d3}, [r0,:128], r1
|
||||
bgt 1b
|
||||
pop {r4-r6,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
.endm
|
||||
|
||||
AVG_WEIGHT add_add
|
||||
@@ -316,7 +316,7 @@ function x264_pixel_avg_w4_neon, export=
|
||||
vst1.32 {d1[0]}, [r0,:32], r1
|
||||
bgt x264_pixel_avg_w4_neon
|
||||
pop {r4-r6,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_avg_w8_neon, export=0
|
||||
subs lr, lr, #4
|
||||
@@ -338,7 +338,7 @@ function x264_pixel_avg_w8_neon, export=
|
||||
vst1.64 {d3}, [r0,:64], r1
|
||||
bgt x264_pixel_avg_w8_neon
|
||||
pop {r4-r6,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_avg_w16_neon, export=0
|
||||
subs lr, lr, #4
|
||||
@@ -360,7 +360,7 @@ function x264_pixel_avg_w16_neon, export
|
||||
vst1.64 {d6-d7}, [r0,:128], r1
|
||||
bgt x264_pixel_avg_w16_neon
|
||||
pop {r4-r6,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
function x264_pixel_avg2_w4_neon
|
||||
@@ -379,7 +379,7 @@ avg2_w4_loop:
|
||||
vst1.32 {d1[0]}, [r0,:32], r1
|
||||
bgt avg2_w4_loop
|
||||
pop {pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_avg2_w8_neon
|
||||
ldr ip, [sp, #4]
|
||||
@@ -397,7 +397,7 @@ avg2_w8_loop:
|
||||
vst1.64 {d1}, [r0,:64], r1
|
||||
bgt avg2_w8_loop
|
||||
pop {pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_avg2_w16_neon
|
||||
ldr ip, [sp, #4]
|
||||
@@ -415,7 +415,7 @@ avg2_w16_loop:
|
||||
vst1.64 {d4-d5}, [r0,:128], r1
|
||||
bgt avg2_w16_loop
|
||||
pop {pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_avg2_w20_neon
|
||||
ldr ip, [sp, #4]
|
||||
@@ -438,7 +438,7 @@ avg2_w20_loop:
|
||||
vst1.32 {d6[0]}, [r0,:32], r1
|
||||
bgt avg2_w20_loop
|
||||
pop {pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
.macro weight_prologue type
|
||||
@@ -499,7 +499,7 @@ weight20_loop:
|
||||
vst1.32 {d20[1]}, [r0,:32], r1
|
||||
bgt weight20_loop
|
||||
pop {r4-r5,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_mc_weight_w16_neon
|
||||
weight_prologue full
|
||||
@@ -531,7 +531,7 @@ weight16_loop:
|
||||
vst1.8 {d18-d19}, [r0,:128], r1
|
||||
bgt weight16_loop
|
||||
pop {r4-r5,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_mc_weight_w8_neon
|
||||
weight_prologue full
|
||||
@@ -553,7 +553,7 @@ weight8_loop:
|
||||
vst1.8 {d18}, [r0,:64], r1
|
||||
bgt weight8_loop
|
||||
pop {r4-r5,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_mc_weight_w4_neon
|
||||
weight_prologue full
|
||||
@@ -572,7 +572,7 @@ weight4_loop:
|
||||
vst1.32 {d16[1]}, [r0,:32], r1
|
||||
bgt weight4_loop
|
||||
pop {r4-r5,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_mc_weight_w20_nodenom_neon
|
||||
weight_prologue nodenom
|
||||
@@ -609,7 +609,7 @@ weight20_nodenom_loop:
|
||||
vst1.32 {d20[1]}, [r0,:32], r1
|
||||
bgt weight20_nodenom_loop
|
||||
pop {r4-r5,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_mc_weight_w16_nodenom_neon
|
||||
weight_prologue nodenom
|
||||
@@ -637,7 +637,7 @@ weight16_nodenom_loop:
|
||||
vst1.8 {d18-d19}, [r0,:128], r1
|
||||
bgt weight16_nodenom_loop
|
||||
pop {r4-r5,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_mc_weight_w8_nodenom_neon
|
||||
weight_prologue nodenom
|
||||
@@ -657,7 +657,7 @@ weight8_nodenom_loop:
|
||||
vst1.8 {d17}, [r0,:64], r1
|
||||
bgt weight8_nodenom_loop
|
||||
pop {r4-r5,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_mc_weight_w4_nodenom_neon
|
||||
weight_prologue nodenom
|
||||
@@ -675,7 +675,7 @@ weight4_nodenom_loop:
|
||||
vst1.32 {d16[1]}, [r0,:32], r1
|
||||
bgt weight4_nodenom_loop
|
||||
pop {r4-r5,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
.macro weight_simple_prologue
|
||||
push {lr}
|
||||
@@ -699,7 +699,7 @@ weight20_\name\()_loop:
|
||||
vst1.8 {d19-d21}, [r0,:64], r1
|
||||
bgt weight20_\name\()_loop
|
||||
pop {pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_mc_weight_w16_\name\()_neon
|
||||
weight_simple_prologue
|
||||
@@ -713,7 +713,7 @@ weight16_\name\()_loop:
|
||||
vst1.8 {d18-d19}, [r0,:128], r1
|
||||
bgt weight16_\name\()_loop
|
||||
pop {pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_mc_weight_w8_\name\()_neon
|
||||
weight_simple_prologue
|
||||
@@ -726,7 +726,7 @@ weight8_\name\()_loop:
|
||||
vst1.8 {d17}, [r0,:64], r1
|
||||
bgt weight8_\name\()_loop
|
||||
pop {pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_mc_weight_w4_\name\()_neon
|
||||
weight_simple_prologue
|
||||
@@ -739,7 +739,7 @@ weight4_\name\()_loop:
|
||||
vst1.32 {d17[0]}, [r0,:32], r1
|
||||
bgt weight4_\name\()_loop
|
||||
pop {pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
.endm
|
||||
|
||||
weight_simple offsetadd, vqadd.u8
|
||||
@@ -761,7 +761,7 @@ copy_w4_loop:
|
||||
vst1.32 {d3[0]}, [r0,:32], r1
|
||||
bgt copy_w4_loop
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_mc_copy_w8_neon
|
||||
ldr ip, [sp]
|
||||
@@ -777,7 +777,7 @@ copy_w8_loop:
|
||||
vst1.32 {d3}, [r0,:64], r1
|
||||
bgt copy_w8_loop
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_mc_copy_w16_neon
|
||||
ldr ip, [sp]
|
||||
@@ -793,7 +793,7 @@ copy_w16_loop:
|
||||
vst1.32 {d6-d7}, [r0,:128], r1
|
||||
bgt copy_w16_loop
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_mc_copy_w16_aligned_neon
|
||||
ldr ip, [sp]
|
||||
@@ -809,7 +809,7 @@ copy_w16_aligned_loop:
|
||||
vst1.32 {d6-d7}, [r0,:128], r1
|
||||
bgt copy_w16_aligned_loop
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
// void x264_mc_chroma_neon( uint8_t *dst, intptr_t i_dst_stride,
|
||||
@@ -1159,7 +1159,7 @@ mc_chroma_w8:
|
||||
vpop {d8-d11}
|
||||
pop {r4-r8, pc}
|
||||
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
// hpel_filter_v( uint8_t *dst, uint8_t *src, int16_t *buf, intptr_t stride, int width )
|
||||
@@ -1200,7 +1200,7 @@ filter_v_loop:
|
||||
vst1.64 {d0-d1}, [r0,:128]!
|
||||
bgt filter_v_loop
|
||||
pop {pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
// hpel_filter_c( uint8_t *dst, int16_t *buf, int width );
|
||||
function x264_hpel_filter_c_neon
|
||||
@@ -1285,7 +1285,7 @@ filter_c_loop:
|
||||
vst1.64 {d30-d31}, [r0,:128]!
|
||||
bgt filter_c_loop
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
// hpel_filter_h( uint8_t *dst, uint8_t *src, int width );
|
||||
function x264_hpel_filter_h_neon
|
||||
@@ -1372,7 +1372,7 @@ filter_h_loop:
|
||||
vst1.64 {d6-d7}, [r0,:128]!
|
||||
bgt filter_h_loop
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
// frame_init_lowres_core( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv,
|
||||
@@ -1464,7 +1464,7 @@ lowres_xloop_end:
|
||||
|
||||
vpop {d8-d15}
|
||||
pop {r4-r10,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_load_deinterleave_chroma_fdec_neon
|
||||
mov ip, #FDEC_STRIDE/2
|
||||
@@ -1477,7 +1477,7 @@ function x264_load_deinterleave_chroma_f
|
||||
bgt 1b
|
||||
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_load_deinterleave_chroma_fenc_neon
|
||||
mov ip, #FENC_STRIDE/2
|
||||
@@ -1490,7 +1490,7 @@ function x264_load_deinterleave_chroma_f
|
||||
bgt 1b
|
||||
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_plane_copy_deinterleave_neon
|
||||
push {r4-r7, lr}
|
||||
@@ -1516,7 +1516,7 @@ block:
|
||||
bgt block
|
||||
|
||||
pop {r4-r7, pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_plane_copy_deinterleave_rgb_neon
|
||||
push {r4-r8, r10, r11, lr}
|
||||
@@ -1568,7 +1568,7 @@ block4:
|
||||
bgt block4
|
||||
|
||||
pop {r4-r8, r10, r11, pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_plane_copy_interleave_neon
|
||||
push {r4-r7, lr}
|
||||
@@ -1595,7 +1595,7 @@ blocki:
|
||||
bgt blocki
|
||||
|
||||
pop {r4-r7, pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_store_interleave_chroma_neon
|
||||
push {lr}
|
||||
@@ -1609,4 +1609,4 @@ function x264_store_interleave_chroma_ne
|
||||
bgt 1b
|
||||
|
||||
pop {pc}
|
||||
-.endfunc
|
||||
+endfunc
|
275
multimedia/libx264/files/patch-common_arm_pixel-a.S
Normal file
275
multimedia/libx264/files/patch-common_arm_pixel-a.S
Normal file
@ -0,0 +1,275 @@
|
||||
--- common/arm/pixel-a.S.orig 2014-08-27 20:45:08 UTC
|
||||
+++ common/arm/pixel-a.S
|
||||
@@ -29,10 +29,12 @@
|
||||
.section .rodata
|
||||
.align 4
|
||||
|
||||
-.rept 16 .byte 0xff
|
||||
+.rept 16
|
||||
+ .byte 0xff
|
||||
.endr
|
||||
mask_ff:
|
||||
-.rept 16 .byte 0
|
||||
+.rept 16
|
||||
+ .byte 0
|
||||
.endr
|
||||
|
||||
mask_ac4:
|
||||
@@ -60,7 +62,7 @@ function x264_pixel_sad_4x\h\()_armv6
|
||||
.endr
|
||||
usada8 r0, r6, lr, ip
|
||||
pop {r4-r6,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
.endm
|
||||
|
||||
SAD4_ARMV6 4
|
||||
@@ -137,7 +139,7 @@ function x264_pixel_sad\name\()_\w\()x\h
|
||||
vpaddl.u16 d0, d0
|
||||
vmov.u32 r0, d0[0]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
.endm
|
||||
|
||||
SAD_FUNC 4, 4
|
||||
@@ -222,7 +224,7 @@ function x264_pixel_sad_aligned_\w\()x\h
|
||||
vpaddl.u16 d0, d0
|
||||
vmov.u32 r0, d0[0]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
.endm
|
||||
|
||||
SAD_FUNC_DUAL 8, 4
|
||||
@@ -368,7 +370,7 @@ function x264_pixel_sad_x\x\()_\w\()x\h\
|
||||
vst1.32 {d0-d1}, [r7]
|
||||
.endif
|
||||
pop {r6-r7,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
.endm
|
||||
|
||||
SAD_X_FUNC 3, 4, 4
|
||||
@@ -477,7 +479,7 @@ function x264_pixel_ssd_\w\()x\h\()_neon
|
||||
vpadd.s32 d0, d0, d0
|
||||
vmov.32 r0, d0[0]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
.endm
|
||||
|
||||
SSD_FUNC 4, 4
|
||||
@@ -517,7 +519,7 @@ function x264_pixel_var_8x8_neon
|
||||
vld1.64 {d26}, [r0,:64], r1
|
||||
VAR_SQR_SUM q2, q10, q15, d26
|
||||
b x264_var_end
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_var_8x16_neon
|
||||
vld1.64 {d16}, [r0,:64], r1
|
||||
@@ -549,7 +551,7 @@ function x264_pixel_var_8x16_neon
|
||||
2:
|
||||
VAR_SQR_SUM q2, q13, q15, d22
|
||||
b x264_var_end
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_var_16x16_neon
|
||||
vld1.64 {d16-d17}, [r0,:128], r1
|
||||
@@ -573,7 +575,7 @@ var16_loop:
|
||||
VAR_SQR_SUM q1, q12, q14, d18
|
||||
VAR_SQR_SUM q2, q13, q15, d19
|
||||
bgt var16_loop
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_var_end, export=0
|
||||
vpaddl.u16 q8, q14
|
||||
@@ -588,7 +590,7 @@ function x264_var_end, export=0
|
||||
|
||||
vmov r0, r1, d0
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
.macro DIFF_SUM diff da db lastdiff
|
||||
vld1.64 {\da}, [r0,:64], r1
|
||||
@@ -633,7 +635,7 @@ function x264_pixel_var2_8x8_neon
|
||||
mul r0, r0, r0
|
||||
sub r0, r1, r0, lsr #6
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_var2_8x16_neon
|
||||
vld1.64 {d16}, [r0,:64], r1
|
||||
@@ -677,7 +679,7 @@ function x264_pixel_var2_8x16_neon
|
||||
mul r0, r0, r0
|
||||
sub r0, r1, r0, lsr #7
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
.macro LOAD_DIFF_8x4 q0 q1 q2 q3
|
||||
vld1.32 {d1}, [r2], r3
|
||||
@@ -714,7 +716,7 @@ function x264_pixel_satd_4x4_neon
|
||||
HORIZ_ADD d0, d0, d1
|
||||
vmov.32 r0, d0[0]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_satd_4x8_neon
|
||||
vld1.32 {d1[]}, [r2], r3
|
||||
@@ -741,7 +743,7 @@ function x264_pixel_satd_4x8_neon
|
||||
vsubl.u8 q3, d6, d7
|
||||
SUMSUB_AB q10, q11, q2, q3
|
||||
b x264_satd_4x8_8x4_end_neon
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_satd_8x4_neon
|
||||
vld1.64 {d1}, [r2], r3
|
||||
@@ -758,7 +760,7 @@ function x264_pixel_satd_8x4_neon
|
||||
vld1.64 {d6}, [r0,:64], r1
|
||||
vsubl.u8 q3, d6, d7
|
||||
SUMSUB_AB q10, q11, q2, q3
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_satd_4x8_8x4_end_neon, export=0
|
||||
vadd.s16 q0, q8, q10
|
||||
@@ -785,7 +787,7 @@ function x264_satd_4x8_8x4_end_neon, exp
|
||||
HORIZ_ADD d0, d0, d1
|
||||
vmov.32 r0, d0[0]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_satd_8x8_neon
|
||||
mov ip, lr
|
||||
@@ -799,7 +801,7 @@ function x264_pixel_satd_8x8_neon
|
||||
mov lr, ip
|
||||
vmov.32 r0, d0[0]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_satd_8x16_neon
|
||||
vpush {d8-d11}
|
||||
@@ -821,7 +823,7 @@ function x264_pixel_satd_8x16_neon
|
||||
mov lr, ip
|
||||
vmov.32 r0, d0[0]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_satd_8x8_neon, export=0
|
||||
LOAD_DIFF_8x4 q8, q9, q10, q11
|
||||
@@ -841,7 +843,7 @@ function x264_satd_8x8_neon, export=0
|
||||
SUMSUB_AB q9, q11, q1, q3
|
||||
vld1.64 {d0}, [r0,:64], r1
|
||||
vsubl.u8 q15, d0, d1
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
// one vertical hadamard pass and two horizontal
|
||||
function x264_satd_8x4v_8x8h_neon, export=0
|
||||
@@ -870,7 +872,7 @@ function x264_satd_8x4v_8x8h_neon, expor
|
||||
vmax.s16 q14, q8, q10
|
||||
vmax.s16 q15, q9, q11
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_satd_16x8_neon
|
||||
vpush {d8-d11}
|
||||
@@ -892,7 +894,7 @@ function x264_pixel_satd_16x8_neon
|
||||
mov lr, ip
|
||||
vmov.32 r0, d0[0]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_satd_16x16_neon
|
||||
vpush {d8-d11}
|
||||
@@ -926,7 +928,7 @@ function x264_pixel_satd_16x16_neon
|
||||
mov lr, ip
|
||||
vmov.32 r0, d0[0]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_satd_16x4_neon, export=0
|
||||
vld1.64 {d2-d3}, [r2], r3
|
||||
@@ -950,7 +952,7 @@ function x264_satd_16x4_neon, export=0
|
||||
SUMSUB_AB q2, q3, q10, q11
|
||||
SUMSUB_ABCD q8, q10, q9, q11, q0, q2, q1, q3
|
||||
b x264_satd_8x4v_8x8h_neon
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
function x264_pixel_sa8d_8x8_neon
|
||||
@@ -963,7 +965,7 @@ function x264_pixel_sa8d_8x8_neon
|
||||
add r0, r0, #1
|
||||
lsr r0, r0, #1
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_pixel_sa8d_16x16_neon
|
||||
vpush {d8-d11}
|
||||
@@ -995,7 +997,7 @@ function x264_pixel_sa8d_16x16_neon
|
||||
add r0, r0, #1
|
||||
lsr r0, r0, #1
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
.macro HADAMARD4_V r1, r2, r3, r4, t1, t2, t3, t4
|
||||
SUMSUB_ABCD \t1, \t2, \t3, \t4, \r1, \r2, \r3, \r4
|
||||
@@ -1058,7 +1060,7 @@ function x264_sa8d_8x8_neon, export=0
|
||||
vadd.i16 q8, q8, q9
|
||||
vadd.i16 q9, q10, q11
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
.macro HADAMARD_AC w h
|
||||
@@ -1094,7 +1096,7 @@ function x264_pixel_hadamard_ac_\w\()x\h
|
||||
lsr r0, r0, #1
|
||||
lsr r1, r1, #2
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
.endm
|
||||
|
||||
HADAMARD_AC 8, 8
|
||||
@@ -1189,7 +1191,7 @@ function x264_hadamard_ac_8x8_neon, expo
|
||||
vadd.s16 q2, q2, q14
|
||||
vpadal.u16 q5, q2
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
.macro SSIM_ITER n ssa s12 ssb lastssa lasts12 lastssb da db dnext
|
||||
@@ -1243,7 +1245,7 @@ function x264_pixel_ssim_4x4x2_core_neon
|
||||
|
||||
vst4.32 {d0-d3}, [ip]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
// FIXME: see about doing 16x16 -> 32 bit multiplies for s1/s2
|
||||
function x264_pixel_ssim_end4_neon
|
||||
@@ -1314,4 +1316,4 @@ ssim_skip:
|
||||
vpadd.f32 d0, d0, d0
|
||||
vmov.32 r0, d0[0]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
242
multimedia/libx264/files/patch-common_arm_predict-a.S
Normal file
242
multimedia/libx264/files/patch-common_arm_predict-a.S
Normal file
@ -0,0 +1,242 @@
|
||||
--- common/arm/predict-a.S.orig 2014-08-27 20:45:08 UTC
|
||||
+++ common/arm/predict-a.S
|
||||
@@ -77,7 +77,7 @@ function x264_predict_4x4_h_armv6
|
||||
add ip, ip, ip, lsl #16
|
||||
str ip, [r0, #3*FDEC_STRIDE]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_4x4_v_armv6
|
||||
ldr r1, [r0, #0 - 1 * FDEC_STRIDE]
|
||||
@@ -86,7 +86,7 @@ function x264_predict_4x4_v_armv6
|
||||
str r1, [r0, #0 + 2 * FDEC_STRIDE]
|
||||
str r1, [r0, #0 + 3 * FDEC_STRIDE]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_4x4_dc_armv6
|
||||
mov ip, #0
|
||||
@@ -109,7 +109,7 @@ function x264_predict_4x4_dc_armv6
|
||||
str r1, [r0, #2*FDEC_STRIDE]
|
||||
str r1, [r0, #3*FDEC_STRIDE]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_4x4_dc_top_neon
|
||||
mov r12, #FDEC_STRIDE
|
||||
@@ -124,7 +124,7 @@ function x264_predict_4x4_dc_top_neon
|
||||
vst1.32 d1[0], [r0,:32], r12
|
||||
vst1.32 d1[0], [r0,:32], r12
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
// return a1 = (a1+2*b1+c1+2)>>2 a2 = (a2+2*b2+c2+2)>>2
|
||||
.macro PRED4x4_LOWPASS a1 b1 c1 a2 b2 c2 pb_1
|
||||
@@ -167,7 +167,7 @@ function x264_predict_4x4_ddr_armv6
|
||||
add r5, r5, r4, lsr #8
|
||||
str r5, [r0, #3*FDEC_STRIDE]
|
||||
pop {r4-r6,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_4x4_ddl_neon
|
||||
sub r0, #FDEC_STRIDE
|
||||
@@ -186,7 +186,7 @@ function x264_predict_4x4_ddl_neon
|
||||
vst1.32 {d2[0]}, [r0,:32], ip
|
||||
vst1.32 {d3[0]}, [r0,:32], ip
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8_dc_neon
|
||||
mov ip, #0
|
||||
@@ -210,7 +210,7 @@ function x264_predict_8x8_dc_neon
|
||||
vst1.64 {d0}, [r0,:64], ip
|
||||
.endr
|
||||
pop {r4-r5,pc}
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8_h_neon
|
||||
add r1, r1, #7
|
||||
@@ -233,7 +233,7 @@ function x264_predict_8x8_h_neon
|
||||
vst1.64 {d6}, [r0,:64], ip
|
||||
vst1.64 {d7}, [r0,:64], ip
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8_v_neon
|
||||
add r1, r1, #16
|
||||
@@ -243,7 +243,7 @@ function x264_predict_8x8_v_neon
|
||||
vst1.8 {d0}, [r0,:64], r12
|
||||
.endr
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8_ddl_neon
|
||||
add r1, #16
|
||||
@@ -271,7 +271,7 @@ function x264_predict_8x8_ddl_neon
|
||||
vst1.8 d2, [r0,:64], r12
|
||||
vst1.8 d1, [r0,:64], r12
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8_ddr_neon
|
||||
vld1.8 {d0-d3}, [r1,:128]
|
||||
@@ -301,7 +301,7 @@ function x264_predict_8x8_ddr_neon
|
||||
vst1.8 {d4}, [r0,:64], r12
|
||||
vst1.8 {d5}, [r0,:64], r12
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8_vl_neon
|
||||
add r1, #16
|
||||
@@ -332,7 +332,7 @@ function x264_predict_8x8_vl_neon
|
||||
vst1.8 {d3}, [r0,:64], r12
|
||||
vst1.8 {d2}, [r0,:64], r12
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8_vr_neon
|
||||
add r1, #8
|
||||
@@ -364,7 +364,7 @@ function x264_predict_8x8_vr_neon
|
||||
vst1.8 {d6}, [r0,:64], r12
|
||||
vst1.8 {d3}, [r0,:64], r12
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8_hd_neon
|
||||
mov r12, #FDEC_STRIDE
|
||||
@@ -397,7 +397,7 @@ function x264_predict_8x8_hd_neon
|
||||
vst1.8 {d16}, [r0,:64], r12
|
||||
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8_hu_neon
|
||||
mov r12, #FDEC_STRIDE
|
||||
@@ -430,7 +430,7 @@ function x264_predict_8x8_hu_neon
|
||||
vst1.8 {d7}, [r0,:64], r12
|
||||
vst1.8 {d17}, [r0,:64]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8c_dc_top_neon
|
||||
sub r2, r0, #FDEC_STRIDE
|
||||
@@ -443,7 +443,7 @@ function x264_predict_8x8c_dc_top_neon
|
||||
vdup.8 d0, d0[0]
|
||||
vtrn.32 d0, d1
|
||||
b pred8x8_dc_end
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8c_dc_left_neon
|
||||
mov r1, #FDEC_STRIDE
|
||||
@@ -455,7 +455,7 @@ function x264_predict_8x8c_dc_left_neon
|
||||
vdup.8 d1, d0[1]
|
||||
vdup.8 d0, d0[0]
|
||||
b pred8x8_dc_end
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8c_dc_neon
|
||||
sub r2, r0, #FDEC_STRIDE
|
||||
@@ -481,7 +481,7 @@ pred8x8_dc_end:
|
||||
vst1.8 {d1}, [r2,:64], r1
|
||||
.endr
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8c_h_neon
|
||||
sub r1, r0, #1
|
||||
@@ -493,7 +493,7 @@ function x264_predict_8x8c_h_neon
|
||||
vst1.64 {d2}, [r0,:64], ip
|
||||
.endr
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8c_v_neon
|
||||
sub r0, r0, #FDEC_STRIDE
|
||||
@@ -503,7 +503,7 @@ function x264_predict_8x8c_v_neon
|
||||
vst1.64 {d0}, [r0,:64], ip
|
||||
.endr
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_8x8c_p_neon
|
||||
sub r3, r0, #FDEC_STRIDE
|
||||
@@ -556,7 +556,7 @@ function x264_predict_8x8c_p_neon
|
||||
subs r3, r3, #1
|
||||
bne 1b
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
function x264_predict_16x16_dc_top_neon
|
||||
@@ -567,7 +567,7 @@ function x264_predict_16x16_dc_top_neon
|
||||
vrshrn.u16 d0, q0, #4
|
||||
vdup.8 q0, d0[0]
|
||||
b pred16x16_dc_end
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_16x16_dc_left_neon
|
||||
mov r1, #FDEC_STRIDE
|
||||
@@ -578,7 +578,7 @@ function x264_predict_16x16_dc_left_neon
|
||||
vrshrn.u16 d0, q0, #4
|
||||
vdup.8 q0, d0[0]
|
||||
b pred16x16_dc_end
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_16x16_dc_neon
|
||||
sub r3, r0, #FDEC_STRIDE
|
||||
@@ -616,7 +616,7 @@ pred16x16_dc_end:
|
||||
vst1.64 {d0-d1}, [r0,:128], r1
|
||||
.endr
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_16x16_h_neon
|
||||
sub r1, r0, #1
|
||||
@@ -630,7 +630,7 @@ function x264_predict_16x16_h_neon
|
||||
vst1.64 {d2-d3}, [r0,:128], ip
|
||||
.endr
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_16x16_v_neon
|
||||
sub r0, r0, #FDEC_STRIDE
|
||||
@@ -640,7 +640,7 @@ function x264_predict_16x16_v_neon
|
||||
vst1.64 {d0-d1}, [r0,:128], ip
|
||||
.endr
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_predict_16x16_p_neon
|
||||
sub r3, r0, #FDEC_STRIDE
|
||||
@@ -697,4 +697,4 @@ function x264_predict_16x16_p_neon
|
||||
subs r3, r3, #1
|
||||
bne 1b
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
98
multimedia/libx264/files/patch-common_arm_quant-a.S
Normal file
98
multimedia/libx264/files/patch-common_arm_quant-a.S
Normal file
@ -0,0 +1,98 @@
|
||||
--- common/arm/quant-a.S.orig 2014-08-27 20:45:08 UTC
|
||||
+++ common/arm/quant-a.S
|
||||
@@ -80,7 +80,7 @@ function x264_quant_2x2_dc_neon
|
||||
vsub.s16 d3, d3, d0
|
||||
vst1.64 {d3}, [r0,:64]
|
||||
QUANT_END d3
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
// quant_4x4_dc( int16_t dct[16], int mf, int bias )
|
||||
function x264_quant_4x4_dc_neon
|
||||
@@ -92,7 +92,7 @@ function x264_quant_4x4_dc_neon
|
||||
QUANT_TWO q0, q0, d4, d5, d4, d5, q0
|
||||
vorr d0, d0, d1
|
||||
QUANT_END d0
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
// quant_4x4( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
|
||||
function x264_quant_4x4_neon
|
||||
@@ -104,7 +104,7 @@ function x264_quant_4x4_neon
|
||||
QUANT_TWO q0, q1, d4, d5, d6, d7, q0
|
||||
vorr d0, d0, d1
|
||||
QUANT_END d0
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
// quant_4x4x4( int16_t dct[4][16], uint16_t mf[16], uint16_t bias[16] )
|
||||
function x264_quant_4x4x4_neon
|
||||
@@ -145,7 +145,7 @@ function x264_quant_4x4x4_neon
|
||||
orrne r0, #8
|
||||
vpop {d8-d15}
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
// quant_8x8( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] )
|
||||
function x264_quant_8x8_neon
|
||||
@@ -165,7 +165,7 @@ function x264_quant_8x8_neon
|
||||
.endr
|
||||
vorr d0, d0, d1
|
||||
QUANT_END d0
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
.macro DEQUANT_START mf_size offset dc=no
|
||||
mov r3, #0x2b
|
||||
@@ -257,7 +257,7 @@ dequant_\size\()_rshift_loop:
|
||||
bgt dequant_\size\()_rshift_loop
|
||||
.endif
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
.endm
|
||||
|
||||
DEQUANT 4x4, 4
|
||||
@@ -307,7 +307,7 @@ dequant_4x4_dc_rshift:
|
||||
vmovn.s32 d3, q13
|
||||
vst1.16 {d0-d3}, [r0,:128]
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
|
||||
// int coeff_last( int16_t *l )
|
||||
@@ -319,7 +319,7 @@ function x264_coeff_last4_arm
|
||||
lsrs r2, r2, #16
|
||||
addne r0, r0, #1
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
function x264_coeff_last8_arm
|
||||
ldrd r2, r3, [r0, #8]
|
||||
@@ -333,7 +333,7 @@ function x264_coeff_last8_arm
|
||||
lsrs r2, r2, #16
|
||||
addne r0, r0, #1
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
|
||||
.macro COEFF_LAST_1x size
|
||||
function x264_coeff_last\size\()_neon
|
||||
@@ -358,7 +358,7 @@ function x264_coeff_last\size\()_neon
|
||||
subslt r0, r3, r0, lsr #2
|
||||
movlt r0, #0
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
||||
.endm
|
||||
|
||||
COEFF_LAST_1x 15
|
||||
@@ -407,4 +407,4 @@ function x264_coeff_last64_neon
|
||||
subslt r0, ip, r0
|
||||
movlt r0, #0
|
||||
bx lr
|
||||
-.endfunc
|
||||
+endfunc
|
@ -1,20 +1,74 @@
|
||||
From 9463ec0004f1bddc49c05ed8e38430a4ce1738fb Mon Sep 17 00:00:00 2001
|
||||
From: Janne Grunau <janne-x264@jannau.net>
|
||||
Date: Sun, 20 Jul 2014 13:40:28 +0200
|
||||
Subject: [PATCH] arm/ppc: use $CC as default assembler
|
||||
|
||||
--- configure.orig 2014-08-27 20:45:08 UTC
|
||||
+++ configure
|
||||
@@ -648,7 +648,7 @@ case $host_cpu in
|
||||
--- configure.orig 2014-08-27 22:45:08.000000000 +0200
|
||||
+++ configure 2015-01-07 13:42:20.868702616 +0100
|
||||
@@ -197,8 +197,9 @@
|
||||
|
||||
as_check() {
|
||||
log_check "whether $AS supports $1"
|
||||
- echo "$1" > conftest.asm
|
||||
- if $AS conftest.asm $ASFLAGS $2 -o conftest.o >conftest.log 2>&1; then
|
||||
+ echo "$1" > conftest$AS_EXT
|
||||
+ as_cmd="$AS conftest$AS_EXT $ASFLAGS $2 -o conftest.o"
|
||||
+ if $as_cmd >conftest.log 2>&1; then
|
||||
res=$?
|
||||
log_ok
|
||||
else
|
||||
@@ -206,12 +207,12 @@
|
||||
log_fail
|
||||
log_msg "Failed commandline was:"
|
||||
log_msg "--------------------------------------------------"
|
||||
- log_msg "$AS conftest.asm $ASFLAGS $2 -o conftest.o"
|
||||
+ log_msg "$as_cmd"
|
||||
cat conftest.log >> config.log
|
||||
log_msg "--------------------------------------------------"
|
||||
log_msg "Failed program was:"
|
||||
log_msg "--------------------------------------------------"
|
||||
- cat conftest.asm >> config.log
|
||||
+ cat conftest$AS_EXT >> config.log
|
||||
log_msg "--------------------------------------------------"
|
||||
fi
|
||||
return $res
|
||||
@@ -302,10 +303,13 @@
|
||||
cross_prefix=""
|
||||
|
||||
EXE=""
|
||||
+AS_EXT=".S"
|
||||
+NL="
|
||||
+"
|
||||
|
||||
# list of all preprocessor HAVE values we can define
|
||||
CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \
|
||||
- LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM"
|
||||
+ LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC"
|
||||
|
||||
# parse options
|
||||
|
||||
@@ -586,6 +590,7 @@
|
||||
i*86)
|
||||
ARCH="X86"
|
||||
AS="yasm"
|
||||
+ AS_EXT=".asm"
|
||||
ASFLAGS="$ASFLAGS -O2 -DARCH_X86_64=0 -I\$(SRCPATH)/common/x86/"
|
||||
if [ $compiler = GNU ]; then
|
||||
if [[ "$asm" == auto && "$CFLAGS" != *-march* ]]; then
|
||||
@@ -626,6 +631,7 @@
|
||||
x86_64)
|
||||
ARCH="X86_64"
|
||||
AS="yasm"
|
||||
+ AS_EXT=".asm"
|
||||
ASFLAGS="$ASFLAGS -DARCH_X86_64=1 -I\$(SRCPATH)/common/x86/"
|
||||
[ $compiler = GNU ] && CFLAGS="-m64 $CFLAGS" && LDFLAGS="-m64 $LDFLAGS"
|
||||
if [ "$SYS" = MACOSX ]; then
|
||||
@@ -648,7 +654,8 @@
|
||||
ARCH="PPC"
|
||||
if [ $asm = auto ] ; then
|
||||
define HAVE_ALTIVEC
|
||||
- AS="${AS-${cross_prefix}gcc}"
|
||||
+ AS="${AS-${CC}}"
|
||||
+ AS_EXT=".c"
|
||||
if [ $SYS = MACOSX ] ; then
|
||||
CFLAGS="$CFLAGS -faltivec -fastf -mcpu=G4"
|
||||
else
|
||||
@@ -674,7 +674,7 @@ case $host_cpu in
|
||||
@@ -674,7 +681,7 @@
|
||||
LDFLAGS="$LDFLAGS -arch armv7"
|
||||
fi
|
||||
else
|
||||
@ -23,3 +77,15 @@ Subject: [PATCH] arm/ppc: use $CC as default assembler
|
||||
fi
|
||||
;;
|
||||
s390|s390x)
|
||||
@@ -760,6 +767,11 @@
|
||||
fi
|
||||
fi
|
||||
|
||||
+if [ $asm = auto -a $ARCH = ARM ] ; then
|
||||
+ # check if the assembler supports '.func' (clang 3.5 does not)
|
||||
+ as_check ".func test${NL}.endfunc" && define HAVE_AS_FUNC 1
|
||||
+fi
|
||||
+
|
||||
[ $asm = no ] && AS=""
|
||||
[ "x$AS" = x ] && asm="no" || asm="yes"
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user