1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-19 10:53:58 +00:00

Kernel support for the Vector-Scalar eXtension (VSX) found on the POWER7

and POWER8. This instruction set unifies the 32 64-bit scalar floating
point registers with the 32 128-bit vector registers into a single bank
of 64 128-bit registers. Kernel support mostly amounts to saving and
restoring the wider version of the floating point registers and making
sure that both scalar FP and vector registers are enabled once a VSX
instruction is executed. get_mcontext() and friends currently cannot
see the high bits, which will require a little more work.

As the system compiler (GCC 4.2) does not support VSX, making use of this
from userland requires either newer GCC or clang.

Relnotes:	yes
Sponsored by:	FreeBSD Foundation
This commit is contained in:
Nathan Whitehorn 2015-02-22 21:40:27 +00:00
parent 4445af212c
commit 35f612b88a
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=279189
13 changed files with 138 additions and 61 deletions

View File

@ -116,6 +116,7 @@ static struct powerpc_exception powerpc_exceptions[] = {
{ 0x0e00, "floating-point assist" }, { 0x0e00, "floating-point assist" },
{ 0x0f00, "performance monitoring" }, { 0x0f00, "performance monitoring" },
{ 0x0f20, "altivec unavailable" }, { 0x0f20, "altivec unavailable" },
{ 0x0f40, "vsx unavailable" },
{ 0x1000, "instruction tlb miss" }, { 0x1000, "instruction tlb miss" },
{ 0x1100, "data load tlb miss" }, { 0x1100, "data load tlb miss" },
{ 0x1200, "data store tlb miss" }, { 0x1200, "data store tlb miss" },
@ -230,6 +231,17 @@ trap(struct trapframe *frame)
enable_vec(td); enable_vec(td);
break; break;
case EXC_VSX:
KASSERT((td->td_pcb->pcb_flags & PCB_VSX) != PCB_VSX,
("VSX already enabled for thread"));
if (!(td->td_pcb->pcb_flags & PCB_VEC))
enable_vec(td);
if (!(td->td_pcb->pcb_flags & PCB_FPU))
save_fpu(td);
td->td_pcb->pcb_flags |= PCB_VSX;
enable_fpu(td);
break;
case EXC_VECAST_G4: case EXC_VECAST_G4:
case EXC_VECAST_G5: case EXC_VECAST_G5:
/* /*
@ -709,7 +721,7 @@ fix_unaligned(struct thread *td, struct trapframe *frame)
case EXC_ALI_LFD: case EXC_ALI_LFD:
case EXC_ALI_STFD: case EXC_ALI_STFD:
reg = EXC_ALI_RST(frame->cpu.aim.dsisr); reg = EXC_ALI_RST(frame->cpu.aim.dsisr);
fpr = &td->td_pcb->pcb_fpu.fpr[reg]; fpr = &td->td_pcb->pcb_fpu.fpr[reg].fpr;
fputhread = PCPU_GET(fputhread); fputhread = PCPU_GET(fputhread);
/* Juggle the FPU to ensure that we've initialized /* Juggle the FPU to ensure that we've initialized

View File

@ -359,7 +359,7 @@ CNAME(trapcode):
li %r1,TRAP_GENTRAP li %r1,TRAP_GENTRAP
ld %r1,0(%r1) ld %r1,0(%r1)
mtlr %r1 mtlr %r1
li %r1, 0xA0 /* How to get the vector from LR */ li %r1, 0xe0 /* How to get the vector from LR */
blrl /* Branch to generictrap */ blrl /* Branch to generictrap */
CNAME(trapcodeend): CNAME(trapcodeend):

View File

@ -335,7 +335,7 @@ fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
if (ra != 0) if (ra != 0)
addr += tf->fixreg[ra]; addr += tf->fixreg[ra];
rt = instr.i_x.i_rt; rt = instr.i_x.i_rt;
a = (int *)&fs->fpreg[rt]; a = (int *)&fs->fpreg[rt].fpr;
DPRINTF(FPE_INSN, DPRINTF(FPE_INSN,
("fpu_execute: Store INT %x at %p\n", ("fpu_execute: Store INT %x at %p\n",
a[1], (void *)addr)); a[1], (void *)addr));
@ -402,7 +402,8 @@ fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
DPRINTF(FPE_INSN, DPRINTF(FPE_INSN,
("fpu_execute: Store DBL at %p\n", ("fpu_execute: Store DBL at %p\n",
(void *)addr)); (void *)addr));
if (copyout(&fs->fpreg[rt], (void *)addr, size)) if (copyout(&fs->fpreg[rt].fpr, (void *)addr,
size))
return (FAULT); return (FAULT);
} }
} else { } else {
@ -410,12 +411,13 @@ fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
FPU_EMU_EVCNT_INCR(fpload); FPU_EMU_EVCNT_INCR(fpload);
DPRINTF(FPE_INSN, ("fpu_execute: Load from %p\n", DPRINTF(FPE_INSN, ("fpu_execute: Load from %p\n",
(void *)addr)); (void *)addr));
if (copyin((const void *)addr, &fs->fpreg[rt], size)) if (copyin((const void *)addr, &fs->fpreg[rt].fpr,
size))
return (FAULT); return (FAULT);
if (type != FTYPE_DBL) { if (type != FTYPE_DBL) {
fpu_explode(fe, fp = &fe->fe_f1, type, rt); fpu_explode(fe, fp = &fe->fe_f1, type, rt);
fpu_implode(fe, fp, FTYPE_DBL, fpu_implode(fe, fp, FTYPE_DBL,
(u_int *)&fs->fpreg[rt]); (u_int *)&fs->fpreg[rt].fpr);
} }
} }
if (update) if (update)
@ -468,7 +470,7 @@ fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
DPRINTF(FPE_INSN, ("fpu_execute: FRSP\n")); DPRINTF(FPE_INSN, ("fpu_execute: FRSP\n"));
fpu_explode(fe, fp = &fe->fe_f1, FTYPE_DBL, rb); fpu_explode(fe, fp = &fe->fe_f1, FTYPE_DBL, rb);
fpu_implode(fe, fp, FTYPE_SNG, fpu_implode(fe, fp, FTYPE_SNG,
(u_int *)&fs->fpreg[rt]); (u_int *)&fs->fpreg[rt].fpr);
fpu_explode(fe, fp = &fe->fe_f1, FTYPE_SNG, rt); fpu_explode(fe, fp = &fe->fe_f1, FTYPE_SNG, rt);
type = FTYPE_DBL; type = FTYPE_DBL;
break; break;
@ -501,9 +503,9 @@ fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
case OPC63_FNEG: case OPC63_FNEG:
FPU_EMU_EVCNT_INCR(fnegabs); FPU_EMU_EVCNT_INCR(fnegabs);
DPRINTF(FPE_INSN, ("fpu_execute: FNEGABS\n")); DPRINTF(FPE_INSN, ("fpu_execute: FNEGABS\n"));
memcpy(&fs->fpreg[rt], &fs->fpreg[rb], memcpy(&fs->fpreg[rt].fpr, &fs->fpreg[rb].fpr,
sizeof(double)); sizeof(double));
a = (int *)&fs->fpreg[rt]; a = (int *)&fs->fpreg[rt].fpr;
*a ^= (1U << 31); *a ^= (1U << 31);
break; break;
case OPC63_MCRFS: case OPC63_MCRFS:
@ -531,7 +533,7 @@ fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
case OPC63_FMR: case OPC63_FMR:
FPU_EMU_EVCNT_INCR(fmr); FPU_EMU_EVCNT_INCR(fmr);
DPRINTF(FPE_INSN, ("fpu_execute: FMR\n")); DPRINTF(FPE_INSN, ("fpu_execute: FMR\n"));
memcpy(&fs->fpreg[rt], &fs->fpreg[rb], memcpy(&fs->fpreg[rt].fpr, &fs->fpreg[rb].fpr,
sizeof(double)); sizeof(double));
break; break;
case OPC63_MTFSFI: case OPC63_MTFSFI:
@ -548,23 +550,23 @@ fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
case OPC63_FNABS: case OPC63_FNABS:
FPU_EMU_EVCNT_INCR(fnabs); FPU_EMU_EVCNT_INCR(fnabs);
DPRINTF(FPE_INSN, ("fpu_execute: FABS\n")); DPRINTF(FPE_INSN, ("fpu_execute: FABS\n"));
memcpy(&fs->fpreg[rt], &fs->fpreg[rb], memcpy(&fs->fpreg[rt].fpr, &fs->fpreg[rb].fpr,
sizeof(double)); sizeof(double));
a = (int *)&fs->fpreg[rt]; a = (int *)&fs->fpreg[rt].fpr;
*a |= (1U << 31); *a |= (1U << 31);
break; break;
case OPC63_FABS: case OPC63_FABS:
FPU_EMU_EVCNT_INCR(fabs); FPU_EMU_EVCNT_INCR(fabs);
DPRINTF(FPE_INSN, ("fpu_execute: FABS\n")); DPRINTF(FPE_INSN, ("fpu_execute: FABS\n"));
memcpy(&fs->fpreg[rt], &fs->fpreg[rb], memcpy(&fs->fpreg[rt].fpr, &fs->fpreg[rb].fpr,
sizeof(double)); sizeof(double));
a = (int *)&fs->fpreg[rt]; a = (int *)&fs->fpreg[rt].fpr;
*a &= ~(1U << 31); *a &= ~(1U << 31);
break; break;
case OPC63_MFFS: case OPC63_MFFS:
FPU_EMU_EVCNT_INCR(mffs); FPU_EMU_EVCNT_INCR(mffs);
DPRINTF(FPE_INSN, ("fpu_execute: MFFS\n")); DPRINTF(FPE_INSN, ("fpu_execute: MFFS\n"));
memcpy(&fs->fpreg[rt], &fs->fpscr, memcpy(&fs->fpreg[rt].fpr, &fs->fpscr,
sizeof(fs->fpscr)); sizeof(fs->fpscr));
break; break;
case OPC63_MTFSF: case OPC63_MTFSF:
@ -579,7 +581,7 @@ fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
if (rt & (1<<ra)) if (rt & (1<<ra))
mask |= (0xf<<(4*ra)); mask |= (0xf<<(4*ra));
} }
a = (int *)&fs->fpreg[rt]; a = (int *)&fs->fpreg[rt].fpr;
fe->fe_cx = mask & a[1]; fe->fe_cx = mask & a[1];
fe->fe_fpscr = (fe->fe_fpscr&~mask) | fe->fe_fpscr = (fe->fe_fpscr&~mask) |
(fe->fe_cx); (fe->fe_cx);
@ -646,12 +648,12 @@ fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
case OPC63M_FSEL: case OPC63M_FSEL:
FPU_EMU_EVCNT_INCR(fsel); FPU_EMU_EVCNT_INCR(fsel);
DPRINTF(FPE_INSN, ("fpu_execute: FSEL\n")); DPRINTF(FPE_INSN, ("fpu_execute: FSEL\n"));
a = (int *)&fe->fe_fpstate->fpreg[ra]; a = (int *)&fe->fe_fpstate->fpreg[ra].fpr;
if ((*a & 0x80000000) && (*a & 0x7fffffff)) if ((*a & 0x80000000) && (*a & 0x7fffffff))
/* fra < 0 */ /* fra < 0 */
rc = rb; rc = rb;
DPRINTF(FPE_INSN, ("f%d => f%d\n", rc, rt)); DPRINTF(FPE_INSN, ("f%d => f%d\n", rc, rt));
memcpy(&fs->fpreg[rt], &fs->fpreg[rc], memcpy(&fs->fpreg[rt].fpr, &fs->fpreg[rc].fpr,
sizeof(double)); sizeof(double));
break; break;
case OPC59_FRES: case OPC59_FRES:
@ -660,7 +662,7 @@ fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
fpu_explode(fe, &fe->fe_f1, type, rb); fpu_explode(fe, &fe->fe_f1, type, rb);
fp = fpu_sqrt(fe); fp = fpu_sqrt(fe);
/* now we've gotta overwrite the dest reg */ /* now we've gotta overwrite the dest reg */
*((int *)&fe->fe_fpstate->fpreg[rt]) = 1; *((int *)&fe->fe_fpstate->fpreg[rt].fpr) = 1;
fpu_explode(fe, &fe->fe_f1, FTYPE_INT, rt); fpu_explode(fe, &fe->fe_f1, FTYPE_INT, rt);
fpu_div(fe); fpu_div(fe);
break; break;
@ -679,7 +681,7 @@ fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
fp = fpu_sqrt(fe); fp = fpu_sqrt(fe);
fe->fe_f2 = *fp; fe->fe_f2 = *fp;
/* now we've gotta overwrite the dest reg */ /* now we've gotta overwrite the dest reg */
*((int *)&fe->fe_fpstate->fpreg[rt]) = 1; *((int *)&fe->fe_fpstate->fpreg[rt].fpr) = 1;
fpu_explode(fe, &fe->fe_f1, FTYPE_INT, rt); fpu_explode(fe, &fe->fe_f1, FTYPE_INT, rt);
fpu_div(fe); fpu_div(fe);
break; break;
@ -735,7 +737,7 @@ fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
/* If the instruction was single precision, round */ /* If the instruction was single precision, round */
if (!(instr.i_any.i_opcd & 0x4)) { if (!(instr.i_any.i_opcd & 0x4)) {
fpu_implode(fe, fp, FTYPE_SNG, fpu_implode(fe, fp, FTYPE_SNG,
(u_int *)&fs->fpreg[rt]); (u_int *)&fs->fpreg[rt].fpr);
fpu_explode(fe, fp = &fe->fe_f1, FTYPE_SNG, rt); fpu_explode(fe, fp = &fe->fe_f1, FTYPE_SNG, rt);
} }
} }
@ -750,7 +752,7 @@ fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
* Otherwise set new current exceptions and accrue. * Otherwise set new current exceptions and accrue.
*/ */
if (fp) if (fp)
fpu_implode(fe, fp, type, (u_int *)&fs->fpreg[rt]); fpu_implode(fe, fp, type, (u_int *)&fs->fpreg[rt].fpr);
cx = fe->fe_cx; cx = fe->fe_cx;
fsr = fe->fe_fpscr; fsr = fe->fe_fpscr;
if (cx != 0) { if (cx != 0) {

View File

@ -211,9 +211,9 @@ fpu_explode(struct fpemu *fe, struct fpn *fp, int type, int reg)
u_int s, *space; u_int s, *space;
u_int64_t l, *xspace; u_int64_t l, *xspace;
xspace = (u_int64_t *)&fe->fe_fpstate->fpreg[reg]; xspace = (u_int64_t *)&fe->fe_fpstate->fpreg[reg].fpr;
l = xspace[0]; l = xspace[0];
space = (u_int *)&fe->fe_fpstate->fpreg[reg]; space = (u_int *)&fe->fe_fpstate->fpreg[reg].fpr;
s = space[0]; s = space[0];
fp->fp_sign = s >> 31; fp->fp_sign = s >> 31;
fp->fp_sticky = 0; fp->fp_sticky = 0;

View File

@ -55,10 +55,12 @@ extern int cpu_features;
#define PPC_FEATURE_HAS_FPU 0x08000000 #define PPC_FEATURE_HAS_FPU 0x08000000
#define PPC_FEATURE_HAS_MMU 0x04000000 #define PPC_FEATURE_HAS_MMU 0x04000000
#define PPC_FEATURE_UNIFIED_CACHE 0x01000000 #define PPC_FEATURE_UNIFIED_CACHE 0x01000000
#define PPC_FEATURE_HAS_VSX 0x00000080
#define PPC_FEATURE_BITMASK \ #define PPC_FEATURE_BITMASK \
"\20" \ "\20" \
"\040PPC32\037PPC64\035ALTIVEC\034FPU\033MMU\031UNIFIEDCACHE" "\040PPC32\037PPC64\035ALTIVEC\034FPU\033MMU\031UNIFIEDCACHE" \
"\010VSX"
#define TRAPF_USERMODE(frame) (((frame)->srr1 & PSL_PR) != 0) #define TRAPF_USERMODE(frame) (((frame)->srr1 & PSL_PR) != 0)
#define TRAPF_PC(frame) ((frame)->srr0) #define TRAPF_PC(frame) ((frame)->srr0)

View File

@ -50,8 +50,12 @@ struct pcb {
#define PCB_FPU 1 /* Process uses FPU */ #define PCB_FPU 1 /* Process uses FPU */
#define PCB_FPREGS 2 /* Process had FPU registers initialized */ #define PCB_FPREGS 2 /* Process had FPU registers initialized */
#define PCB_VEC 4 /* Process had Altivec initialized */ #define PCB_VEC 4 /* Process had Altivec initialized */
#define PCB_VSX 8 /* Process had VSX initialized */
struct fpu { struct fpu {
double fpr[32]; union {
double fpr;
uint32_t vsr[4];
} fpr[32];
double fpscr; /* FPSCR stored as double for easier access */ double fpscr; /* FPSCR stored as double for easier access */
} pcb_fpu; /* Floating point processor */ } pcb_fpu; /* Floating point processor */
unsigned int pcb_fpcpu; /* which CPU had our FPU unsigned int pcb_fpcpu; /* which CPU had our FPU

View File

@ -39,6 +39,7 @@
* Machine State Register (MSR) - All cores * Machine State Register (MSR) - All cores
*/ */
#define PSL_VEC 0x02000000UL /* AltiVec/SPE vector unit available */ #define PSL_VEC 0x02000000UL /* AltiVec/SPE vector unit available */
#define PSL_VSX 0x00800000UL /* Vector-Scalar unit available */
#define PSL_EE 0x00008000UL /* external interrupt enable */ #define PSL_EE 0x00008000UL /* external interrupt enable */
#define PSL_PR 0x00004000UL /* privilege mode (1 == user) */ #define PSL_PR 0x00004000UL /* privilege mode (1 == user) */
#define PSL_FP 0x00002000UL /* floating point enable */ #define PSL_FP 0x00002000UL /* floating point enable */

View File

@ -20,7 +20,10 @@ struct reg {
/* Must match pcb.pcb_fpu */ /* Must match pcb.pcb_fpu */
struct fpreg { struct fpreg {
double fpreg[32]; union {
double fpr;
uint64_t vsr[2];
} fpreg[32];
double fpscr; double fpscr;
}; };

View File

@ -74,6 +74,9 @@
#define EXC_DLMISS 0x1100 /* Data load translation miss */ #define EXC_DLMISS 0x1100 /* Data load translation miss */
#define EXC_DSMISS 0x1200 /* Data store translation miss */ #define EXC_DSMISS 0x1200 /* Data store translation miss */
/* Power ISA 2.06+: */
#define EXC_VSX 0x0f40 /* VSX Unavailable */
/* The following are available on 4xx and 85xx */ /* The following are available on 4xx and 85xx */
#define EXC_CRIT 0x0100 /* Critical Input Interrupt */ #define EXC_CRIT 0x0100 /* Critical Input Interrupt */
#define EXC_PIT 0x1000 /* Programmable Interval Timer */ #define EXC_PIT 0x1000 /* Programmable Interval Timer */

View File

@ -141,17 +141,17 @@ static const struct cputab models[] = {
PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU,
NULL }, NULL },
{ "IBM POWER7", IBMPOWER7, REVFMT_MAJMIN, { "IBM POWER7", IBMPOWER7, REVFMT_MAJMIN,
PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU |
NULL }, PPC_FEATURE_HAS_VSX, NULL },
{ "IBM POWER7+", IBMPOWER7PLUS, REVFMT_MAJMIN, { "IBM POWER7+", IBMPOWER7PLUS, REVFMT_MAJMIN,
PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU |
NULL }, PPC_FEATURE_HAS_VSX, NULL },
{ "IBM POWER8E", IBMPOWER8E, REVFMT_MAJMIN, { "IBM POWER8E", IBMPOWER8E, REVFMT_MAJMIN,
PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU |
NULL }, PPC_FEATURE_HAS_VSX, NULL },
{ "IBM POWER8", IBMPOWER8, REVFMT_MAJMIN, { "IBM POWER8", IBMPOWER8, REVFMT_MAJMIN,
PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, PPC_FEATURE_64 | PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU |
NULL }, PPC_FEATURE_HAS_VSX, NULL },
{ "Motorola PowerPC 7400", MPC7400, REVFMT_MAJMIN, { "Motorola PowerPC 7400", MPC7400, REVFMT_MAJMIN,
PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup }, PPC_FEATURE_HAS_ALTIVEC | PPC_FEATURE_HAS_FPU, cpu_6xx_setup },
{ "Motorola PowerPC 7410", MPC7410, REVFMT_MAJMIN, { "Motorola PowerPC 7410", MPC7410, REVFMT_MAJMIN,

View File

@ -252,6 +252,7 @@ db_backtrace(struct thread *td, db_addr_t fp, int count)
case EXC_FPU: trapstr = "FPU"; break; case EXC_FPU: trapstr = "FPU"; break;
case EXC_DECR: trapstr = "DECR"; break; case EXC_DECR: trapstr = "DECR"; break;
case EXC_PERF: trapstr = "PERF"; break; case EXC_PERF: trapstr = "PERF"; break;
case EXC_VSX: trapstr = "VSX"; break;
default: trapstr = NULL; break; default: trapstr = NULL; break;
} }
if (trapstr != NULL) { if (trapstr != NULL) {

View File

@ -373,6 +373,7 @@ static int
grab_mcontext(struct thread *td, mcontext_t *mcp, int flags) grab_mcontext(struct thread *td, mcontext_t *mcp, int flags)
{ {
struct pcb *pcb; struct pcb *pcb;
int i;
pcb = td->td_pcb; pcb = td->td_pcb;
@ -403,6 +404,9 @@ grab_mcontext(struct thread *td, mcontext_t *mcp, int flags)
mcp->mc_flags |= _MC_FP_VALID; mcp->mc_flags |= _MC_FP_VALID;
memcpy(&mcp->mc_fpscr, &pcb->pcb_fpu.fpscr, sizeof(double)); memcpy(&mcp->mc_fpscr, &pcb->pcb_fpu.fpscr, sizeof(double));
memcpy(mcp->mc_fpreg, pcb->pcb_fpu.fpr, 32*sizeof(double)); memcpy(mcp->mc_fpreg, pcb->pcb_fpu.fpr, 32*sizeof(double));
for (i = 0; i < 32; i++)
memcpy(&mcp->mc_fpreg[i], &pcb->pcb_fpu.fpr[i].fpr,
sizeof(double));
} }
/* /*
@ -421,6 +425,8 @@ grab_mcontext(struct thread *td, mcontext_t *mcp, int flags)
memcpy(mcp->mc_avec, pcb->pcb_vec.vr, sizeof(mcp->mc_avec)); memcpy(mcp->mc_avec, pcb->pcb_vec.vr, sizeof(mcp->mc_avec));
} }
/* XXX VSX context */
mcp->mc_len = sizeof(*mcp); mcp->mc_len = sizeof(*mcp);
return (0); return (0);
@ -447,6 +453,7 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
struct pcb *pcb; struct pcb *pcb;
struct trapframe *tf; struct trapframe *tf;
register_t tls; register_t tls;
int i;
pcb = td->td_pcb; pcb = td->td_pcb;
tf = td->td_frame; tf = td->td_frame;
@ -476,7 +483,10 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
/* enable_fpu() will happen lazily on a fault */ /* enable_fpu() will happen lazily on a fault */
pcb->pcb_flags |= PCB_FPREGS; pcb->pcb_flags |= PCB_FPREGS;
memcpy(&pcb->pcb_fpu.fpscr, &mcp->mc_fpscr, sizeof(double)); memcpy(&pcb->pcb_fpu.fpscr, &mcp->mc_fpscr, sizeof(double));
memcpy(pcb->pcb_fpu.fpr, mcp->mc_fpreg, 32*sizeof(double)); bzero(pcb->pcb_fpu.fpr, sizeof(pcb->pcb_fpu.fpr));
for (i = 0; i < 32; i++)
memcpy(&pcb->pcb_fpu.fpr[i].fpr, &mcp->mc_fpreg[i],
sizeof(double));
} }
if (mcp->mc_flags & _MC_AV_VALID) { if (mcp->mc_flags & _MC_AV_VALID) {
@ -490,6 +500,8 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
memcpy(pcb->pcb_vec.vr, mcp->mc_avec, sizeof(mcp->mc_avec)); memcpy(pcb->pcb_vec.vr, mcp->mc_avec, sizeof(mcp->mc_avec));
} }
/* XXX VSX context */
return (0); return (0);
} }

View File

@ -67,7 +67,10 @@ enable_fpu(struct thread *td)
* to indicate that the FPU is in use. * to indicate that the FPU is in use.
*/ */
pcb->pcb_flags |= PCB_FPU; pcb->pcb_flags |= PCB_FPU;
tf->srr1 |= PSL_FP; if (pcb->pcb_flags & PCB_VSX)
tf->srr1 |= PSL_FP | PSL_VSX;
else
tf->srr1 |= PSL_FP;
if (!(pcb->pcb_flags & PCB_FPREGS)) { if (!(pcb->pcb_flags & PCB_FPREGS)) {
memset(&pcb->pcb_fpu, 0, sizeof pcb->pcb_fpu); memset(&pcb->pcb_fpu, 0, sizeof pcb->pcb_fpu);
pcb->pcb_flags |= PCB_FPREGS; pcb->pcb_flags |= PCB_FPREGS;
@ -78,7 +81,10 @@ enable_fpu(struct thread *td)
* can be restored. * can be restored.
*/ */
msr = mfmsr(); msr = mfmsr();
mtmsr(msr | PSL_FP); if (pcb->pcb_flags & PCB_VSX)
mtmsr(msr | PSL_FP | PSL_VSX);
else
mtmsr(msr | PSL_FP);
isync(); isync();
/* /*
@ -89,17 +95,31 @@ enable_fpu(struct thread *td)
__asm __volatile ("lfd 0,0(%0); mtfsf 0xff,0" __asm __volatile ("lfd 0,0(%0); mtfsf 0xff,0"
:: "b"(&pcb->pcb_fpu.fpscr)); :: "b"(&pcb->pcb_fpu.fpscr));
#define LFP(n) __asm ("lfd " #n ", 0(%0)" \ if (pcb->pcb_flags & PCB_VSX) {
:: "b"(&pcb->pcb_fpu.fpr[n])); #define LFP(n) __asm ("lxvw4x " #n ", 0,%0" \
LFP(0); LFP(1); LFP(2); LFP(3); :: "b"(&pcb->pcb_fpu.fpr[n]));
LFP(4); LFP(5); LFP(6); LFP(7); LFP(0); LFP(1); LFP(2); LFP(3);
LFP(8); LFP(9); LFP(10); LFP(11); LFP(4); LFP(5); LFP(6); LFP(7);
LFP(12); LFP(13); LFP(14); LFP(15); LFP(8); LFP(9); LFP(10); LFP(11);
LFP(16); LFP(17); LFP(18); LFP(19); LFP(12); LFP(13); LFP(14); LFP(15);
LFP(20); LFP(21); LFP(22); LFP(23); LFP(16); LFP(17); LFP(18); LFP(19);
LFP(24); LFP(25); LFP(26); LFP(27); LFP(20); LFP(21); LFP(22); LFP(23);
LFP(28); LFP(29); LFP(30); LFP(31); LFP(24); LFP(25); LFP(26); LFP(27);
#undef LFP LFP(28); LFP(29); LFP(30); LFP(31);
#undef LFP
} else {
#define LFP(n) __asm ("lfd " #n ", 0(%0)" \
:: "b"(&pcb->pcb_fpu.fpr[n]));
LFP(0); LFP(1); LFP(2); LFP(3);
LFP(4); LFP(5); LFP(6); LFP(7);
LFP(8); LFP(9); LFP(10); LFP(11);
LFP(12); LFP(13); LFP(14); LFP(15);
LFP(16); LFP(17); LFP(18); LFP(19);
LFP(20); LFP(21); LFP(22); LFP(23);
LFP(24); LFP(25); LFP(26); LFP(27);
LFP(28); LFP(29); LFP(30); LFP(31);
#undef LFP
}
isync(); isync();
mtmsr(msr); mtmsr(msr);
@ -117,23 +137,40 @@ save_fpu(struct thread *td)
* Temporarily re-enable floating-point during the save * Temporarily re-enable floating-point during the save
*/ */
msr = mfmsr(); msr = mfmsr();
mtmsr(msr | PSL_FP); if (pcb->pcb_flags & PCB_VSX)
mtmsr(msr | PSL_FP | PSL_VSX);
else
mtmsr(msr | PSL_FP);
isync(); isync();
/* /*
* Save the floating-point registers and FPSCR to the PCB * Save the floating-point registers and FPSCR to the PCB
*/ */
#define SFP(n) __asm ("stfd " #n ", 0(%0)" \ if (pcb->pcb_flags & PCB_VSX) {
:: "b"(&pcb->pcb_fpu.fpr[n])); #define SFP(n) __asm ("stxvw4x " #n ", 0,%0" \
SFP(0); SFP(1); SFP(2); SFP(3); :: "b"(&pcb->pcb_fpu.fpr[n]));
SFP(4); SFP(5); SFP(6); SFP(7); SFP(0); SFP(1); SFP(2); SFP(3);
SFP(8); SFP(9); SFP(10); SFP(11); SFP(4); SFP(5); SFP(6); SFP(7);
SFP(12); SFP(13); SFP(14); SFP(15); SFP(8); SFP(9); SFP(10); SFP(11);
SFP(16); SFP(17); SFP(18); SFP(19); SFP(12); SFP(13); SFP(14); SFP(15);
SFP(20); SFP(21); SFP(22); SFP(23); SFP(16); SFP(17); SFP(18); SFP(19);
SFP(24); SFP(25); SFP(26); SFP(27); SFP(20); SFP(21); SFP(22); SFP(23);
SFP(28); SFP(29); SFP(30); SFP(31); SFP(24); SFP(25); SFP(26); SFP(27);
#undef SFP SFP(28); SFP(29); SFP(30); SFP(31);
#undef SFP
} else {
#define SFP(n) __asm ("stfd " #n ", 0(%0)" \
:: "b"(&pcb->pcb_fpu.fpr[n]));
SFP(0); SFP(1); SFP(2); SFP(3);
SFP(4); SFP(5); SFP(6); SFP(7);
SFP(8); SFP(9); SFP(10); SFP(11);
SFP(12); SFP(13); SFP(14); SFP(15);
SFP(16); SFP(17); SFP(18); SFP(19);
SFP(20); SFP(21); SFP(22); SFP(23);
SFP(24); SFP(25); SFP(26); SFP(27);
SFP(28); SFP(29); SFP(30); SFP(31);
#undef SFP
}
__asm __volatile ("mffs 0; stfd 0,0(%0)" :: "b"(&pcb->pcb_fpu.fpscr)); __asm __volatile ("mffs 0; stfd 0,0(%0)" :: "b"(&pcb->pcb_fpu.fpscr));
/* /*