xregs_sig: Add a ability to handle multiple register banks

While here replace the .global directive by traditionally used .globl.

Reviewed by:		kib
Differential Revision:	https://reviews.freebsd.org/D40410
This commit is contained in:
Dmitry Chagin 2023-06-04 12:06:25 +03:00
parent 086e0149ae
commit 0be13a45f7
3 changed files with 100 additions and 64 deletions

View File

@ -5,9 +5,11 @@
* $FreeBSD$
*/
.global cpu_to_xregs
.type cpu_to_xregs, @function
cpu_to_xregs:
.text
.globl cpu_to_vfp
.type cpu_to_vfp, @function
cpu_to_vfp:
stp q0, q1, [x0, #( 0 * 16)]
stp q2, q3, [x0, #( 2 * 16)]
stp q4, q5, [x0, #( 4 * 16)]
@ -26,12 +28,12 @@ cpu_to_xregs:
stp q30, q31, [x0, #(30 * 16)]
ret
.size cpu_to_xregs, . - cpu_to_xregs
.size cpu_to_vfp, . - cpu_to_vfp
.global xregs_to_cpu
.type xregs_to_cpu, @function
xregs_to_cpu:
.globl vfp_to_cpu
.type vfp_to_cpu, @function
vfp_to_cpu:
ldp q0, q1, [x0, #( 0 * 16)]
ldp q2, q3, [x0, #( 2 * 16)]
ldp q4, q5, [x0, #( 4 * 16)]
@ -50,6 +52,6 @@ xregs_to_cpu:
ldp q30, q31, [x0, #(30 * 16)]
ret
.size xregs_to_cpu, . - xregs_to_cpu
.size vfp_to_cpu, . - vfp_to_cpu
.section .note.GNU-stack,"",@progbits

View File

@ -5,9 +5,11 @@
* $FreeBSD$
*/
.global cpu_to_xregs
.type cpu_to_xregs, @function
cpu_to_xregs:
.text
.globl cpu_to_xmm
.type cpu_to_xmm, @function
cpu_to_xmm:
movdqu %xmm0, (%rdi)
movdqu %xmm1, 1 * 16(%rdi)
movdqu %xmm2, 2 * 16(%rdi)
@ -26,12 +28,12 @@ cpu_to_xregs:
movdqu %xmm15, 15 * 16(%rdi)
retq
.size cpu_to_xregs, . - cpu_to_xregs
.size cpu_to_xmm, . - cpu_to_xmm
.global xregs_to_cpu
.type xregs_to_cpu, @function
xregs_to_cpu:
.globl xmm_to_cpu
.type xmm_to_cpu, @function
xmm_to_cpu:
movdqu (%rdi), %xmm0
movdqu 1 * 16(%rdi), %xmm1
movdqu 2 * 16(%rdi), %xmm2
@ -50,6 +52,6 @@ xregs_to_cpu:
movdqu 15 * 16(%rdi), %xmm15
retq
.size xregs_to_cpu, . - xregs_to_cpu
.size xmm_to_cpu, . - xmm_to_cpu
.section .note.GNU-stack,"",@progbits

View File

@ -44,30 +44,49 @@
#ifndef __unused
#define __unused __attribute__((__unused__))
#endif
#ifndef nitems
#define nitems(x) (sizeof((x)) / sizeof((x)[0]))
#endif
struct xregs_bank {
const char *b_name;
const char *r_name;
uint32_t regs;
uint32_t bytes;
void (*x2c)(uint8_t *);
void (*c2x)(uint8_t *);
};
#if defined(__amd64__)
#define XREGSRNAM "xmm"
#define NREGS 16
void cpu_to_xmm(uint8_t *);
void xmm_to_cpu(uint8_t *);
static const struct xregs_bank xregs_banks[] = {
{
.b_name = "SSE",
.r_name = "xmm",
.regs = 16,
.bytes = 16,
.x2c = xmm_to_cpu,
.c2x = cpu_to_xmm,
},
};
#elif defined(__aarch64__)
#define XREGSRNAM "q"
#define NREGS 32
void cpu_to_vfp(uint8_t *);
void vfp_to_cpu(uint8_t *);
static const struct xregs_bank xregs_banks[] = {
{
.b_name = "VFP",
.r_name = "q",
.regs = 32,
.bytes = 16,
.x2c = vfp_to_cpu,
.c2x = cpu_to_vfp,
},
};
#endif
struct xregsreg {
uint8_t xregs_bytes[16];
};
struct xregs {
struct xregsreg xregsreg[NREGS];
};
void cpu_to_xregs(struct xregs *xregs);
void xregs_to_cpu(struct xregs *xregs);
static atomic_uint sigs;
static int max_bank_idx;
static void
sigusr1_handler(int sig __unused, siginfo_t *si __unused, void *m __unused)
@ -87,23 +106,22 @@ sigalrm_handler(int sig __unused)
alarm(TIMO);
}
static struct xregs zero_xregs = {};
static void
fill_xregs(struct xregs *xregs)
fill_xregs(uint8_t *xregs, int bank)
{
arc4random_buf(xregs, sizeof(*xregs));
arc4random_buf(xregs, xregs_banks[bank].regs * xregs_banks[bank].bytes);
}
static void
dump_xregs(const struct xregsreg *r)
dump_xregs(const uint8_t *r, int bank)
{
unsigned k;
for (k = 0; k < nitems(r->xregs_bytes); k++) {
for (k = 0; k < xregs_banks[bank].bytes; k++) {
if (k != 0)
printf(" ");
printf("%02x", r->xregs_bytes[k]);
printf("%02x", r[k]);
}
printf("\n");
}
@ -111,9 +129,9 @@ dump_xregs(const struct xregsreg *r)
static pthread_mutex_t show_lock;
static void
show_diff(const struct xregs *xregs1, const struct xregs *xregs2)
show_diff(const uint8_t *xregs1, const uint8_t *xregs2, int bank)
{
const struct xregsreg *r1, *r2;
const uint8_t *r1, *r2;
unsigned i, j;
#if defined(__FreeBSD__)
@ -121,14 +139,14 @@ show_diff(const struct xregs *xregs1, const struct xregs *xregs2)
#elif defined(__linux__)
printf("thr %ld\n", syscall(SYS_gettid));
#endif
for (i = 0; i < nitems(xregs1->xregsreg); i++) {
r1 = &xregs1->xregsreg[i];
r2 = &xregs2->xregsreg[i];
for (j = 0; j < nitems(r1->xregs_bytes); j++) {
if (r1->xregs_bytes[j] != r2->xregs_bytes[j]) {
printf("%%%s%u\n", XREGSRNAM, i);
dump_xregs(r1);
dump_xregs(r2);
for (i = 0; i < xregs_banks[bank].regs; i++) {
r1 = xregs1 + i * xregs_banks[bank].bytes;
r2 = xregs2 + i * xregs_banks[bank].bytes;
for (j = 0; j < xregs_banks[bank].bytes; j++) {
if (r1[j] != r2[j]) {
printf("%%%s%u\n", xregs_banks[bank].r_name, i);
dump_xregs(r1, bank);
dump_xregs(r2, bank);
break;
}
}
@ -142,28 +160,32 @@ my_pause(void)
}
static void *
worker_thread(void *arg __unused)
worker_thread(void *arg)
{
struct xregs xregs, xregs_cpu;
int bank = (uintptr_t)arg;
int sz = xregs_banks[bank].regs * xregs_banks[bank].bytes;
uint8_t xregs[sz], xregs_cpu[sz], zero_xregs[sz];
fill_xregs(&xregs);
memset(zero_xregs, 0, sz);
fill_xregs(xregs, bank);
for (;;) {
xregs_to_cpu(&xregs);
xregs_banks[bank].x2c(xregs);
my_pause();
cpu_to_xregs(&xregs_cpu);
if (memcmp(&xregs, &xregs_cpu, sizeof(struct xregs)) != 0) {
xregs_banks[bank].c2x(xregs_cpu);
if (memcmp(xregs, xregs_cpu, sz) != 0) {
pthread_mutex_lock(&show_lock);
show_diff(&xregs, &xregs_cpu);
show_diff(xregs, xregs_cpu, bank);
abort();
pthread_mutex_unlock(&show_lock);
}
xregs_to_cpu(&zero_xregs);
xregs_banks[bank].x2c(zero_xregs);
my_pause();
cpu_to_xregs(&xregs_cpu);
if (memcmp(&zero_xregs, &xregs_cpu, sizeof(struct xregs)) != 0) {
xregs_banks[bank].c2x(xregs_cpu);
if (memcmp(zero_xregs, xregs_cpu, sz) != 0) {
pthread_mutex_lock(&show_lock);
show_diff(&zero_xregs, &xregs_cpu);
show_diff(zero_xregs, xregs_cpu, bank);
abort();
pthread_mutex_unlock(&show_lock);
}
@ -175,7 +197,9 @@ int
main(void)
{
struct sigaction sa;
int error, i, ncpu;
int error, i, ncpu, bank;
max_bank_idx = 0;
bzero(&sa, sizeof(sa));
sa.sa_handler = sigalrm_handler;
@ -199,14 +223,22 @@ main(void)
}
ncpu = sysconf(_SC_NPROCESSORS_ONLN);
ncpu *= 2;
if (max_bank_idx == 0)
ncpu *= 2;
bank = 0;
pthread_t wt[ncpu];
nextbank:
printf("Starting %d threads for registers bank %s sized [%d][%d]\n", ncpu,
xregs_banks[bank].b_name, xregs_banks[bank].regs, xregs_banks[bank].bytes);
for (i = 0; i < ncpu; i++) {
error = pthread_create(&wt[i], NULL, worker_thread, NULL);
error = pthread_create(&wt[i], NULL, worker_thread,
(void *)(uintptr_t)bank);
if (error != 0) {
fprintf(stderr, "pthread_create %s\n", strerror(error));
}
}
if (++bank <= max_bank_idx)
goto nextbank;
alarm(TIMO);
for (;;) {