mirror of
https://git.FreeBSD.org/ports.git
synced 2024-11-18 00:10:04 +00:00
archivers/c-blosc2: Rename patch files
This commit is contained in:
parent
05c5bc0d6e
commit
9784f27ed7
@ -1,134 +0,0 @@
|
||||
--- blosc/shuffle-altivec.c.orig 2024-08-12 12:43:11 UTC
|
||||
+++ blosc/shuffle-altivec.c
|
||||
@@ -27,7 +27,7 @@ shuffle2_altivec(uint8_t* const dest, const uint8_t* c
|
||||
const int32_t vectorizable_elements, const int32_t total_elements){
|
||||
static const int32_t bytesoftype = 2;
|
||||
uint32_t i, j;
|
||||
- __vector uint8_t xmm0[2];
|
||||
+ __vector unsigned char xmm0[2];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16){
|
||||
/* Fetch 16 elements (32 bytes) */
|
||||
@@ -49,7 +49,7 @@ shuffle4_altivec(uint8_t* const dest, const uint8_t* c
|
||||
const int32_t vectorizable_elements, const int32_t total_elements){
|
||||
static const int32_t bytesoftype = 4;
|
||||
int32_t i, j;
|
||||
- __vector uint8_t xmm0[4];
|
||||
+ __vector unsigned char xmm0[4];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16)
|
||||
{
|
||||
@@ -75,7 +75,7 @@ shuffle8_altivec(uint8_t* const dest, const uint8_t* c
|
||||
const int32_t vectorizable_elements, const int32_t total_elements) {
|
||||
static const uint8_t bytesoftype = 8;
|
||||
int32_t i, j;
|
||||
- __vector uint8_t xmm0[8];
|
||||
+ __vector unsigned char xmm0[8];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16)
|
||||
{
|
||||
@@ -98,7 +98,7 @@ shuffle16_altivec(uint8_t* const dest, const uint8_t*
|
||||
const int32_t vectorizable_elements, const int32_t total_elements) {
|
||||
static const int32_t bytesoftype = 16;
|
||||
int32_t i, j;
|
||||
- __vector uint8_t xmm0[16];
|
||||
+ __vector unsigned char xmm0[16];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16)
|
||||
{
|
||||
@@ -123,7 +123,7 @@ shuffle16_tiled_altivec(uint8_t* const dest, const uin
|
||||
const int32_t bytesoftype) {
|
||||
int32_t j, k;
|
||||
const int32_t vecs_per_el_rem = bytesoftype & 0xF;
|
||||
- __vector uint8_t xmm[16];
|
||||
+ __vector unsigned char xmm[16];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16) {
|
||||
/* Advance the offset into the type by the vector size (in bytes), unless this is
|
||||
@@ -154,7 +154,7 @@ unshuffle2_altivec(uint8_t* const dest, const uint8_t*
|
||||
const int32_t vectorizable_elements, const int32_t total_elements) {
|
||||
static const int32_t bytesoftype = 2;
|
||||
uint32_t i, j;
|
||||
- __vector uint8_t xmm0[2], xmm1[2];
|
||||
+ __vector unsigned char xmm0[2], xmm1[2];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16) {
|
||||
/* Load 16 elements (32 bytes) into 2 vectors registers. */
|
||||
@@ -178,7 +178,7 @@ unshuffle4_altivec(uint8_t* const dest, const uint8_t*
|
||||
const int32_t vectorizable_elements, const int32_t total_elements) {
|
||||
static const int32_t bytesoftype = 4;
|
||||
uint32_t i, j;
|
||||
- __vector uint8_t xmm0[4], xmm1[4];
|
||||
+ __vector unsigned char xmm0[4], xmm1[4];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16) {
|
||||
/* Load 16 elements (64 bytes) into 4 vectors registers. */
|
||||
@@ -193,11 +193,11 @@ unshuffle4_altivec(uint8_t* const dest, const uint8_t*
|
||||
/* Shuffle 2-byte words */
|
||||
for (i = 0; i < 2; i++) {
|
||||
/* Compute the low 32 bytes */
|
||||
- xmm0[i] = (__vector uint8_t) vec_vmrghh((__vector uint16_t)xmm1[i * 2],
|
||||
- (__vector uint16_t) xmm1[i * 2 + 1]);
|
||||
+ xmm0[i] = (__vector unsigned char) vec_vmrghh((__vector unsigned short)xmm1[i * 2],
|
||||
+ (__vector unsigned short) xmm1[i * 2 + 1]);
|
||||
/* Compute the hi 32 bytes */
|
||||
- xmm0[i+2] = (__vector uint8_t) vec_vmrglh((__vector uint16_t)xmm1[i * 2],
|
||||
- (__vector uint16_t)xmm1[i * 2 + 1]);
|
||||
+ xmm0[i+2] = (__vector unsigned char) vec_vmrglh((__vector unsigned short)xmm1[i * 2],
|
||||
+ (__vector unsigned short)xmm1[i * 2 + 1]);
|
||||
}
|
||||
/* Store the result vectors in proper order */
|
||||
vec_xst(xmm0[0], bytesoftype * j, dest);
|
||||
@@ -213,7 +213,7 @@ unshuffle8_altivec(uint8_t* const dest, const uint8_t*
|
||||
const int32_t vectorizable_elements, const int32_t total_elements) {
|
||||
static const uint8_t bytesoftype = 8;
|
||||
uint32_t i, j;
|
||||
- __vector uint8_t xmm0[8], xmm1[8];
|
||||
+ __vector unsigned char xmm0[8], xmm1[8];
|
||||
|
||||
// Initialize permutations for writing
|
||||
for (j = 0; j < vectorizable_elements; j += 16) {
|
||||
@@ -227,17 +227,17 @@ unshuffle8_altivec(uint8_t* const dest, const uint8_t*
|
||||
}
|
||||
/* Shuffle 2-byte words */
|
||||
for (i = 0; i < 4; i++) {
|
||||
- xmm0[i] = (__vector uint8_t)vec_vmrghh((__vector uint16_t)xmm1[i * 2],
|
||||
- (__vector uint16_t)xmm1[i * 2 + 1]);
|
||||
- xmm0[4 + i] = (__vector uint8_t)vec_vmrglh((__vector uint16_t)xmm1[i * 2],
|
||||
- (__vector uint16_t)xmm1[i * 2 + 1]);
|
||||
+ xmm0[i] = (__vector unsigned char)vec_vmrghh((__vector unsigned short)xmm1[i * 2],
|
||||
+ (__vector unsigned short)xmm1[i * 2 + 1]);
|
||||
+ xmm0[4 + i] = (__vector unsigned char)vec_vmrglh((__vector unsigned short)xmm1[i * 2],
|
||||
+ (__vector unsigned short)xmm1[i * 2 + 1]);
|
||||
}
|
||||
/* Shuffle 4-byte dwords */
|
||||
for (i = 0; i < 4; i++) {
|
||||
- xmm1[i] = (__vector uint8_t)vec_vmrghw((__vector uint32_t)xmm0[i * 2],
|
||||
- (__vector uint32_t)xmm0[i * 2 + 1]);
|
||||
- xmm1[4 + i] = (__vector uint8_t)vec_vmrglw((__vector uint32_t)xmm0[i * 2],
|
||||
- (__vector uint32_t)xmm0[i * 2 + 1]);
|
||||
+ xmm1[i] = (__vector unsigned char)vec_vmrghw((__vector unsigned int)xmm0[i * 2],
|
||||
+ (__vector unsigned int)xmm0[i * 2 + 1]);
|
||||
+ xmm1[4 + i] = (__vector unsigned char)vec_vmrglw((__vector unsigned int)xmm0[i * 2],
|
||||
+ (__vector unsigned int)xmm0[i * 2 + 1]);
|
||||
}
|
||||
/* Store the result vectors in proper order */
|
||||
vec_xst(xmm1[0], bytesoftype * j, dest);
|
||||
@@ -258,7 +258,7 @@ unshuffle16_altivec(uint8_t* const dest, const uint8_t
|
||||
const int32_t vectorizable_elements, const int32_t total_elements) {
|
||||
static const int32_t bytesoftype = 16;
|
||||
uint32_t i, j;
|
||||
- __vector uint8_t xmm0[16];
|
||||
+ __vector unsigned char xmm0[16];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16) {
|
||||
/* Load 16 elements (64 bytes) into 4 vectors registers. */
|
||||
@@ -282,7 +282,7 @@ unshuffle16_tiled_altivec(uint8_t* const dest, const u
|
||||
const int32_t bytesoftype) {
|
||||
int32_t i, j, offset_into_type;
|
||||
const int32_t vecs_per_el_rem = bytesoftype & 0xF;
|
||||
- __vector uint8_t xmm[16];
|
||||
+ __vector unsigned char xmm[16];
|
||||
|
||||
|
||||
/* Advance the offset into the type by the vector size (in bytes), unless this is
|
@ -1,58 +0,0 @@
|
||||
--- blosc/transpose-altivec.h.orig 2024-08-12 12:42:34 UTC
|
||||
+++ blosc/transpose-altivec.h
|
||||
@@ -15,18 +15,18 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
-static const __vector uint8_t even = (const __vector uint8_t) {
|
||||
+static const __vector unsigned char even = (const __vector unsigned char) {
|
||||
0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
|
||||
0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e};
|
||||
|
||||
-static const __vector uint8_t odd = (const __vector uint8_t) {
|
||||
+static const __vector unsigned char odd = (const __vector unsigned char) {
|
||||
0x01, 0x03, 0x05, 0x07, 0x09, 0x0b, 0x0d, 0x0f,
|
||||
0x11, 0x13, 0x15, 0x17, 0x19, 0x1b, 0x1d, 0x1f};
|
||||
|
||||
|
||||
/* Transpose inplace 2 vectors of 16 bytes in src into dst. */
|
||||
-static void transpose2x16(__vector uint8_t *xmm0) {
|
||||
- __vector uint8_t xmm1[2];
|
||||
+static void transpose2x16(__vector unsigned char *xmm0) {
|
||||
+ __vector unsigned char xmm1[2];
|
||||
xmm1[0] = vec_perm(xmm0[0], xmm0[1], even);
|
||||
xmm1[1] = vec_perm(xmm0[0], xmm0[1], odd);
|
||||
|
||||
@@ -38,8 +38,8 @@ static void transpose2x16(__vector uint8_t *xmm0) {
|
||||
|
||||
/* Transpose inplace 4 vectors of 16 bytes in src into dst.
|
||||
* Total cost: 8 calls to vec_perm. */
|
||||
-static void transpose4x16(__vector uint8_t *xmm0) {
|
||||
- __vector uint8_t xmm1[4];
|
||||
+static void transpose4x16(__vector unsigned char *xmm0) {
|
||||
+ __vector unsigned char xmm1[4];
|
||||
|
||||
/* Transpose vectors 0-1*/
|
||||
xmm1[0] = vec_perm(xmm0[0], xmm0[1], even);
|
||||
@@ -56,8 +56,8 @@ static void transpose4x16(__vector uint8_t *xmm0) {
|
||||
|
||||
/* Transpose inplace 8 vectors of 16 bytes in src into dst.
|
||||
* Total cost: 24 calls to vec_perm. */
|
||||
-static void transpose8x16(__vector uint8_t *xmm0) {
|
||||
- __vector uint8_t xmm1[8];
|
||||
+static void transpose8x16(__vector unsigned char *xmm0) {
|
||||
+ __vector unsigned char xmm1[8];
|
||||
|
||||
/* Transpose vectors 0-1*/
|
||||
for (int i = 0; i < 8; i += 2){
|
||||
@@ -85,8 +85,8 @@ static void transpose8x16(__vector uint8_t *xmm0) {
|
||||
|
||||
/* Transpose inplace 16 vectors of 16 bytes in src into dst.
|
||||
* Total cost: 64 calls to vec_perm. */
|
||||
-static void transpose16x16(__vector uint8_t * xmm0){
|
||||
- __vector uint8_t xmm1[16];
|
||||
+static void transpose16x16(__vector unsigned char * xmm0){
|
||||
+ __vector unsigned char xmm1[16];
|
||||
/* Transpose vectors 0-1*/
|
||||
for (int i = 0; i < 16; i += 2){
|
||||
xmm1[i] = vec_perm(xmm0[i], xmm0[i+1], even);
|
@ -219,3 +219,195 @@
|
||||
oui16 = (uint16_t*)&out_b[ii + (jj>>3) + kk * elem_size];
|
||||
*oui16 = tmp[4];
|
||||
}
|
||||
--- blosc/shuffle-altivec.c.orig 2024-08-12 12:43:11 UTC
|
||||
+++ blosc/shuffle-altivec.c
|
||||
@@ -27,7 +27,7 @@ shuffle2_altivec(uint8_t* const dest, const uint8_t* c
|
||||
const int32_t vectorizable_elements, const int32_t total_elements){
|
||||
static const int32_t bytesoftype = 2;
|
||||
uint32_t i, j;
|
||||
- __vector uint8_t xmm0[2];
|
||||
+ __vector unsigned char xmm0[2];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16){
|
||||
/* Fetch 16 elements (32 bytes) */
|
||||
@@ -49,7 +49,7 @@ shuffle4_altivec(uint8_t* const dest, const uint8_t* c
|
||||
const int32_t vectorizable_elements, const int32_t total_elements){
|
||||
static const int32_t bytesoftype = 4;
|
||||
int32_t i, j;
|
||||
- __vector uint8_t xmm0[4];
|
||||
+ __vector unsigned char xmm0[4];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16)
|
||||
{
|
||||
@@ -75,7 +75,7 @@ shuffle8_altivec(uint8_t* const dest, const uint8_t* c
|
||||
const int32_t vectorizable_elements, const int32_t total_elements) {
|
||||
static const uint8_t bytesoftype = 8;
|
||||
int32_t i, j;
|
||||
- __vector uint8_t xmm0[8];
|
||||
+ __vector unsigned char xmm0[8];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16)
|
||||
{
|
||||
@@ -98,7 +98,7 @@ shuffle16_altivec(uint8_t* const dest, const uint8_t*
|
||||
const int32_t vectorizable_elements, const int32_t total_elements) {
|
||||
static const int32_t bytesoftype = 16;
|
||||
int32_t i, j;
|
||||
- __vector uint8_t xmm0[16];
|
||||
+ __vector unsigned char xmm0[16];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16)
|
||||
{
|
||||
@@ -123,7 +123,7 @@ shuffle16_tiled_altivec(uint8_t* const dest, const uin
|
||||
const int32_t bytesoftype) {
|
||||
int32_t j, k;
|
||||
const int32_t vecs_per_el_rem = bytesoftype & 0xF;
|
||||
- __vector uint8_t xmm[16];
|
||||
+ __vector unsigned char xmm[16];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16) {
|
||||
/* Advance the offset into the type by the vector size (in bytes), unless this is
|
||||
@@ -154,7 +154,7 @@ unshuffle2_altivec(uint8_t* const dest, const uint8_t*
|
||||
const int32_t vectorizable_elements, const int32_t total_elements) {
|
||||
static const int32_t bytesoftype = 2;
|
||||
uint32_t i, j;
|
||||
- __vector uint8_t xmm0[2], xmm1[2];
|
||||
+ __vector unsigned char xmm0[2], xmm1[2];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16) {
|
||||
/* Load 16 elements (32 bytes) into 2 vectors registers. */
|
||||
@@ -178,7 +178,7 @@ unshuffle4_altivec(uint8_t* const dest, const uint8_t*
|
||||
const int32_t vectorizable_elements, const int32_t total_elements) {
|
||||
static const int32_t bytesoftype = 4;
|
||||
uint32_t i, j;
|
||||
- __vector uint8_t xmm0[4], xmm1[4];
|
||||
+ __vector unsigned char xmm0[4], xmm1[4];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16) {
|
||||
/* Load 16 elements (64 bytes) into 4 vectors registers. */
|
||||
@@ -193,11 +193,11 @@ unshuffle4_altivec(uint8_t* const dest, const uint8_t*
|
||||
/* Shuffle 2-byte words */
|
||||
for (i = 0; i < 2; i++) {
|
||||
/* Compute the low 32 bytes */
|
||||
- xmm0[i] = (__vector uint8_t) vec_vmrghh((__vector uint16_t)xmm1[i * 2],
|
||||
- (__vector uint16_t) xmm1[i * 2 + 1]);
|
||||
+ xmm0[i] = (__vector unsigned char) vec_vmrghh((__vector unsigned short)xmm1[i * 2],
|
||||
+ (__vector unsigned short) xmm1[i * 2 + 1]);
|
||||
/* Compute the hi 32 bytes */
|
||||
- xmm0[i+2] = (__vector uint8_t) vec_vmrglh((__vector uint16_t)xmm1[i * 2],
|
||||
- (__vector uint16_t)xmm1[i * 2 + 1]);
|
||||
+ xmm0[i+2] = (__vector unsigned char) vec_vmrglh((__vector unsigned short)xmm1[i * 2],
|
||||
+ (__vector unsigned short)xmm1[i * 2 + 1]);
|
||||
}
|
||||
/* Store the result vectors in proper order */
|
||||
vec_xst(xmm0[0], bytesoftype * j, dest);
|
||||
@@ -213,7 +213,7 @@ unshuffle8_altivec(uint8_t* const dest, const uint8_t*
|
||||
const int32_t vectorizable_elements, const int32_t total_elements) {
|
||||
static const uint8_t bytesoftype = 8;
|
||||
uint32_t i, j;
|
||||
- __vector uint8_t xmm0[8], xmm1[8];
|
||||
+ __vector unsigned char xmm0[8], xmm1[8];
|
||||
|
||||
// Initialize permutations for writing
|
||||
for (j = 0; j < vectorizable_elements; j += 16) {
|
||||
@@ -227,17 +227,17 @@ unshuffle8_altivec(uint8_t* const dest, const uint8_t*
|
||||
}
|
||||
/* Shuffle 2-byte words */
|
||||
for (i = 0; i < 4; i++) {
|
||||
- xmm0[i] = (__vector uint8_t)vec_vmrghh((__vector uint16_t)xmm1[i * 2],
|
||||
- (__vector uint16_t)xmm1[i * 2 + 1]);
|
||||
- xmm0[4 + i] = (__vector uint8_t)vec_vmrglh((__vector uint16_t)xmm1[i * 2],
|
||||
- (__vector uint16_t)xmm1[i * 2 + 1]);
|
||||
+ xmm0[i] = (__vector unsigned char)vec_vmrghh((__vector unsigned short)xmm1[i * 2],
|
||||
+ (__vector unsigned short)xmm1[i * 2 + 1]);
|
||||
+ xmm0[4 + i] = (__vector unsigned char)vec_vmrglh((__vector unsigned short)xmm1[i * 2],
|
||||
+ (__vector unsigned short)xmm1[i * 2 + 1]);
|
||||
}
|
||||
/* Shuffle 4-byte dwords */
|
||||
for (i = 0; i < 4; i++) {
|
||||
- xmm1[i] = (__vector uint8_t)vec_vmrghw((__vector uint32_t)xmm0[i * 2],
|
||||
- (__vector uint32_t)xmm0[i * 2 + 1]);
|
||||
- xmm1[4 + i] = (__vector uint8_t)vec_vmrglw((__vector uint32_t)xmm0[i * 2],
|
||||
- (__vector uint32_t)xmm0[i * 2 + 1]);
|
||||
+ xmm1[i] = (__vector unsigned char)vec_vmrghw((__vector unsigned int)xmm0[i * 2],
|
||||
+ (__vector unsigned int)xmm0[i * 2 + 1]);
|
||||
+ xmm1[4 + i] = (__vector unsigned char)vec_vmrglw((__vector unsigned int)xmm0[i * 2],
|
||||
+ (__vector unsigned int)xmm0[i * 2 + 1]);
|
||||
}
|
||||
/* Store the result vectors in proper order */
|
||||
vec_xst(xmm1[0], bytesoftype * j, dest);
|
||||
@@ -258,7 +258,7 @@ unshuffle16_altivec(uint8_t* const dest, const uint8_t
|
||||
const int32_t vectorizable_elements, const int32_t total_elements) {
|
||||
static const int32_t bytesoftype = 16;
|
||||
uint32_t i, j;
|
||||
- __vector uint8_t xmm0[16];
|
||||
+ __vector unsigned char xmm0[16];
|
||||
|
||||
for (j = 0; j < vectorizable_elements; j += 16) {
|
||||
/* Load 16 elements (64 bytes) into 4 vectors registers. */
|
||||
@@ -282,7 +282,7 @@ unshuffle16_tiled_altivec(uint8_t* const dest, const u
|
||||
const int32_t bytesoftype) {
|
||||
int32_t i, j, offset_into_type;
|
||||
const int32_t vecs_per_el_rem = bytesoftype & 0xF;
|
||||
- __vector uint8_t xmm[16];
|
||||
+ __vector unsigned char xmm[16];
|
||||
|
||||
|
||||
/* Advance the offset into the type by the vector size (in bytes), unless this is
|
||||
--- blosc/transpose-altivec.h.orig 2024-08-12 12:42:34 UTC
|
||||
+++ blosc/transpose-altivec.h
|
||||
@@ -15,18 +15,18 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
-static const __vector uint8_t even = (const __vector uint8_t) {
|
||||
+static const __vector unsigned char even = (const __vector unsigned char) {
|
||||
0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
|
||||
0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e};
|
||||
|
||||
-static const __vector uint8_t odd = (const __vector uint8_t) {
|
||||
+static const __vector unsigned char odd = (const __vector unsigned char) {
|
||||
0x01, 0x03, 0x05, 0x07, 0x09, 0x0b, 0x0d, 0x0f,
|
||||
0x11, 0x13, 0x15, 0x17, 0x19, 0x1b, 0x1d, 0x1f};
|
||||
|
||||
|
||||
/* Transpose inplace 2 vectors of 16 bytes in src into dst. */
|
||||
-static void transpose2x16(__vector uint8_t *xmm0) {
|
||||
- __vector uint8_t xmm1[2];
|
||||
+static void transpose2x16(__vector unsigned char *xmm0) {
|
||||
+ __vector unsigned char xmm1[2];
|
||||
xmm1[0] = vec_perm(xmm0[0], xmm0[1], even);
|
||||
xmm1[1] = vec_perm(xmm0[0], xmm0[1], odd);
|
||||
|
||||
@@ -38,8 +38,8 @@ static void transpose2x16(__vector uint8_t *xmm0) {
|
||||
|
||||
/* Transpose inplace 4 vectors of 16 bytes in src into dst.
|
||||
* Total cost: 8 calls to vec_perm. */
|
||||
-static void transpose4x16(__vector uint8_t *xmm0) {
|
||||
- __vector uint8_t xmm1[4];
|
||||
+static void transpose4x16(__vector unsigned char *xmm0) {
|
||||
+ __vector unsigned char xmm1[4];
|
||||
|
||||
/* Transpose vectors 0-1*/
|
||||
xmm1[0] = vec_perm(xmm0[0], xmm0[1], even);
|
||||
@@ -56,8 +56,8 @@ static void transpose4x16(__vector uint8_t *xmm0) {
|
||||
|
||||
/* Transpose inplace 8 vectors of 16 bytes in src into dst.
|
||||
* Total cost: 24 calls to vec_perm. */
|
||||
-static void transpose8x16(__vector uint8_t *xmm0) {
|
||||
- __vector uint8_t xmm1[8];
|
||||
+static void transpose8x16(__vector unsigned char *xmm0) {
|
||||
+ __vector unsigned char xmm1[8];
|
||||
|
||||
/* Transpose vectors 0-1*/
|
||||
for (int i = 0; i < 8; i += 2){
|
||||
@@ -85,8 +85,8 @@ static void transpose8x16(__vector uint8_t *xmm0) {
|
||||
|
||||
/* Transpose inplace 16 vectors of 16 bytes in src into dst.
|
||||
* Total cost: 64 calls to vec_perm. */
|
||||
-static void transpose16x16(__vector uint8_t * xmm0){
|
||||
- __vector uint8_t xmm1[16];
|
||||
+static void transpose16x16(__vector unsigned char * xmm0){
|
||||
+ __vector unsigned char xmm1[16];
|
||||
/* Transpose vectors 0-1*/
|
||||
for (int i = 0; i < 16; i += 2){
|
||||
xmm1[i] = vec_perm(xmm0[i], xmm0[i+1], even);
|
Loading…
Reference in New Issue
Block a user