mirror of
https://git.FreeBSD.org/src.git
synced 2024-10-19 02:29:40 +00:00
Implement guest vcpu pinning using 'pthread_setaffinity_np(3)'.
Prior to this change pinning was implemented via an ioctl (VM_SET_PINNING) that called 'sched_bind()' on behalf of the user thread. The ULE implementation of 'sched_bind()' bumps up 'td_pinned' which in turn runs afoul of the assertion '(td_pinned == 0)' in userret(). Using the cpuset affinity to implement pinning of the vcpu threads works with both 4BSD and ULE schedulers and has the happy side-effect of getting rid of a bunch of code in vmm.ko. Discussed with: grehan
This commit is contained in:
parent
f667ff300d
commit
485b3300cc
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=246686
@ -249,34 +249,6 @@ vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid)
|
||||
{
|
||||
int error;
|
||||
struct vm_pin vmpin;
|
||||
|
||||
bzero(&vmpin, sizeof(vmpin));
|
||||
vmpin.vm_cpuid = vcpu;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_PINNING, &vmpin);
|
||||
*host_cpuid = vmpin.host_cpuid;
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid)
|
||||
{
|
||||
int error;
|
||||
struct vm_pin vmpin;
|
||||
|
||||
bzero(&vmpin, sizeof(vmpin));
|
||||
vmpin.vm_cpuid = vcpu;
|
||||
vmpin.host_cpuid = host_cpuid;
|
||||
|
||||
error = ioctl(ctx->fd, VM_SET_PINNING, &vmpin);
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit)
|
||||
{
|
||||
|
@ -56,8 +56,6 @@ int vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
|
||||
uint64_t *base, uint32_t *limit, uint32_t *access);
|
||||
int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
|
||||
int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
|
||||
int vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid);
|
||||
int vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid);
|
||||
int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
|
||||
struct vm_exit *ret_vmexit);
|
||||
int vm_apicid2vcpu(struct vmctx *ctx, int apicid);
|
||||
|
@ -102,8 +102,6 @@ int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
|
||||
struct seg_desc *ret_desc);
|
||||
int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
|
||||
struct seg_desc *desc);
|
||||
int vm_get_pinning(struct vm *vm, int vcpu, int *cpuid);
|
||||
int vm_set_pinning(struct vm *vm, int vcpu, int cpuid);
|
||||
int vm_run(struct vm *vm, struct vm_run *vmrun);
|
||||
int vm_inject_event(struct vm *vm, int vcpu, int type,
|
||||
int vector, uint32_t error_code, int error_code_valid);
|
||||
|
@ -51,11 +51,6 @@ struct vm_seg_desc { /* data or code segment */
|
||||
struct seg_desc desc;
|
||||
};
|
||||
|
||||
struct vm_pin {
|
||||
int vm_cpuid;
|
||||
int host_cpuid; /* -1 to unpin */
|
||||
};
|
||||
|
||||
struct vm_run {
|
||||
int cpuid;
|
||||
uint64_t rip; /* start running here */
|
||||
@ -142,8 +137,6 @@ struct vm_x2apic {
|
||||
|
||||
enum {
|
||||
IOCNUM_RUN,
|
||||
IOCNUM_SET_PINNING,
|
||||
IOCNUM_GET_PINNING,
|
||||
IOCNUM_MAP_MEMORY,
|
||||
IOCNUM_GET_MEMORY_SEG,
|
||||
IOCNUM_SET_REGISTER,
|
||||
@ -168,10 +161,6 @@ enum {
|
||||
|
||||
#define VM_RUN \
|
||||
_IOWR('v', IOCNUM_RUN, struct vm_run)
|
||||
#define VM_SET_PINNING \
|
||||
_IOW('v', IOCNUM_SET_PINNING, struct vm_pin)
|
||||
#define VM_GET_PINNING \
|
||||
_IOWR('v', IOCNUM_GET_PINNING, struct vm_pin)
|
||||
#define VM_MAP_MEMORY \
|
||||
_IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
|
||||
#define VM_GET_MEMORY_SEG \
|
||||
|
@ -402,31 +402,6 @@ pptintr(void *arg)
|
||||
return (FILTER_HANDLED);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX
|
||||
* When we try to free the MSI resource the kernel will bind the thread to
|
||||
* the host cpu was originally handling the MSI. The function freeing the
|
||||
* MSI vector (apic_free_vector()) will panic the kernel if the thread
|
||||
* is already bound to a cpu.
|
||||
*
|
||||
* So, we temporarily unbind the vcpu thread before freeing the MSI resource.
|
||||
*/
|
||||
static void
|
||||
PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt)
|
||||
{
|
||||
int pincpu = -1;
|
||||
|
||||
vm_get_pinning(vm, vcpu, &pincpu);
|
||||
|
||||
if (pincpu >= 0)
|
||||
vm_set_pinning(vm, vcpu, -1);
|
||||
|
||||
ppt_teardown_msi(ppt);
|
||||
|
||||
if (pincpu >= 0)
|
||||
vm_set_pinning(vm, vcpu, pincpu);
|
||||
}
|
||||
|
||||
int
|
||||
ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
|
||||
int destcpu, int vector, int numvec)
|
||||
@ -447,7 +422,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
|
||||
return (EBUSY);
|
||||
|
||||
/* Free any allocated resources */
|
||||
PPT_TEARDOWN_MSI(vm, vcpu, ppt);
|
||||
ppt_teardown_msi(ppt);
|
||||
|
||||
if (numvec == 0) /* nothing more to do */
|
||||
return (0);
|
||||
@ -513,7 +488,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
|
||||
}
|
||||
|
||||
if (i < numvec) {
|
||||
PPT_TEARDOWN_MSI(vm, vcpu, ppt);
|
||||
ppt_teardown_msi(ppt);
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
|
@ -70,7 +70,6 @@ struct vcpu {
|
||||
int flags;
|
||||
enum vcpu_state state;
|
||||
struct mtx mtx;
|
||||
int pincpu; /* host cpuid this vcpu is bound to */
|
||||
int hostcpu; /* host cpuid this vcpu last ran on */
|
||||
uint64_t guest_msrs[VMM_MSR_NUM];
|
||||
struct vlapic *vlapic;
|
||||
@ -81,18 +80,6 @@ struct vcpu {
|
||||
enum x2apic_state x2apic_state;
|
||||
int nmi_pending;
|
||||
};
|
||||
#define VCPU_F_PINNED 0x0001
|
||||
|
||||
#define VCPU_PINCPU(vm, vcpuid) \
|
||||
((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1)
|
||||
|
||||
#define VCPU_UNPIN(vm, vcpuid) (vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED)
|
||||
|
||||
#define VCPU_PIN(vm, vcpuid, host_cpuid) \
|
||||
do { \
|
||||
vm->vcpu[vcpuid].flags |= VCPU_F_PINNED; \
|
||||
vm->vcpu[vcpuid].pincpu = host_cpuid; \
|
||||
} while(0)
|
||||
|
||||
#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
|
||||
#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
|
||||
@ -594,52 +581,6 @@ vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
|
||||
return (VMSETDESC(vm->cookie, vcpu, reg, desc));
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_pinning(struct vm *vm, int vcpuid, int *cpuid)
|
||||
{
|
||||
|
||||
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
*cpuid = VCPU_PINCPU(vm, vcpuid);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
vm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid)
|
||||
{
|
||||
struct thread *td;
|
||||
|
||||
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
td = curthread; /* XXXSMP only safe when muxing vcpus */
|
||||
|
||||
/* unpin */
|
||||
if (host_cpuid < 0) {
|
||||
VCPU_UNPIN(vm, vcpuid);
|
||||
thread_lock(td);
|
||||
sched_unbind(td);
|
||||
thread_unlock(td);
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (CPU_ABSENT(host_cpuid))
|
||||
return (EINVAL);
|
||||
|
||||
/*
|
||||
* XXX we should check that 'host_cpuid' has not already been pinned
|
||||
* by another vm.
|
||||
*/
|
||||
thread_lock(td);
|
||||
sched_bind(td, host_cpuid);
|
||||
thread_unlock(td);
|
||||
VCPU_PIN(vm, vcpuid, host_cpuid);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
restore_guest_fpustate(struct vcpu *vcpu)
|
||||
{
|
||||
|
@ -144,7 +144,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
|
||||
struct vm_memory_segment *seg;
|
||||
struct vm_register *vmreg;
|
||||
struct vm_seg_desc* vmsegdesc;
|
||||
struct vm_pin *vmpin;
|
||||
struct vm_run *vmrun;
|
||||
struct vm_event *vmevent;
|
||||
struct vm_lapic_irq *vmirq;
|
||||
@ -170,7 +169,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
|
||||
*/
|
||||
switch (cmd) {
|
||||
case VM_RUN:
|
||||
case VM_SET_PINNING:
|
||||
case VM_GET_REGISTER:
|
||||
case VM_SET_REGISTER:
|
||||
case VM_GET_SEGMENT_DESCRIPTOR:
|
||||
@ -301,16 +299,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
|
||||
vmirq = (struct vm_lapic_irq *)data;
|
||||
error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector);
|
||||
break;
|
||||
case VM_SET_PINNING:
|
||||
vmpin = (struct vm_pin *)data;
|
||||
error = vm_set_pinning(sc->vm, vmpin->vm_cpuid,
|
||||
vmpin->host_cpuid);
|
||||
break;
|
||||
case VM_GET_PINNING:
|
||||
vmpin = (struct vm_pin *)data;
|
||||
error = vm_get_pinning(sc->vm, vmpin->vm_cpuid,
|
||||
&vmpin->host_cpuid);
|
||||
break;
|
||||
case VM_MAP_MEMORY:
|
||||
seg = (struct vm_memory_segment *)data;
|
||||
error = vm_malloc(sc->vm, seg->gpa, seg->len);
|
||||
|
@ -520,13 +520,17 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
|
||||
static void
|
||||
vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
|
||||
{
|
||||
cpuset_t mask;
|
||||
int error, rc, prevcpu;
|
||||
|
||||
if (guest_vcpu_mux)
|
||||
setup_timeslice();
|
||||
|
||||
if (pincpu >= 0) {
|
||||
error = vm_set_pinning(ctx, vcpu, pincpu + vcpu);
|
||||
CPU_ZERO(&mask);
|
||||
CPU_SET(pincpu + vcpu, &mask);
|
||||
error = pthread_setaffinity_np(pthread_self(),
|
||||
sizeof(mask), &mask);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
|
@ -183,8 +183,6 @@ usage(void)
|
||||
" [--get-vmcs-exit-interruption-info]\n"
|
||||
" [--get-vmcs-exit-interruption-error]\n"
|
||||
" [--get-vmcs-interruptibility]\n"
|
||||
" [--set-pinning=<host_cpuid>]\n"
|
||||
" [--get-pinning]\n"
|
||||
" [--set-x2apic-state=<state>]\n"
|
||||
" [--get-x2apic-state]\n"
|
||||
" [--set-lowmem=<memory below 4GB in units of MB>]\n"
|
||||
@ -218,7 +216,6 @@ static int set_desc_tr, get_desc_tr;
|
||||
static int set_desc_ldtr, get_desc_ldtr;
|
||||
static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr;
|
||||
static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr;
|
||||
static int set_pinning, get_pinning, pincpu;
|
||||
static int set_x2apic_state, get_x2apic_state;
|
||||
enum x2apic_state x2apic_state;
|
||||
static int run;
|
||||
@ -374,7 +371,6 @@ enum {
|
||||
SET_SS,
|
||||
SET_TR,
|
||||
SET_LDTR,
|
||||
SET_PINNING,
|
||||
SET_X2APIC_STATE,
|
||||
SET_VMCS_EXCEPTION_BITMAP,
|
||||
SET_VMCS_ENTRY_INTERRUPTION_INFO,
|
||||
@ -423,7 +419,6 @@ main(int argc, char *argv[])
|
||||
{ "set-ss", REQ_ARG, 0, SET_SS },
|
||||
{ "set-tr", REQ_ARG, 0, SET_TR },
|
||||
{ "set-ldtr", REQ_ARG, 0, SET_LDTR },
|
||||
{ "set-pinning",REQ_ARG, 0, SET_PINNING },
|
||||
{ "set-x2apic-state",REQ_ARG, 0, SET_X2APIC_STATE },
|
||||
{ "set-vmcs-exception-bitmap",
|
||||
REQ_ARG, 0, SET_VMCS_EXCEPTION_BITMAP },
|
||||
@ -552,7 +547,6 @@ main(int argc, char *argv[])
|
||||
NO_ARG, &get_vmcs_exit_interruption_error, 1},
|
||||
{ "get-vmcs-interruptibility",
|
||||
NO_ARG, &get_vmcs_interruptibility, 1 },
|
||||
{ "get-pinning",NO_ARG, &get_pinning, 1 },
|
||||
{ "get-x2apic-state",NO_ARG, &get_x2apic_state, 1 },
|
||||
{ "get-all", NO_ARG, &get_all, 1 },
|
||||
{ "run", NO_ARG, &run, 1 },
|
||||
@ -659,10 +653,6 @@ main(int argc, char *argv[])
|
||||
ldtr = strtoul(optarg, NULL, 0);
|
||||
set_ldtr = 1;
|
||||
break;
|
||||
case SET_PINNING:
|
||||
pincpu = strtol(optarg, NULL, 0);
|
||||
set_pinning = 1;
|
||||
break;
|
||||
case SET_X2APIC_STATE:
|
||||
x2apic_state = strtol(optarg, NULL, 0);
|
||||
set_x2apic_state = 1;
|
||||
@ -812,9 +802,6 @@ main(int argc, char *argv[])
|
||||
if (!error && set_ldtr)
|
||||
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_LDTR, ldtr);
|
||||
|
||||
if (!error && set_pinning)
|
||||
error = vm_set_pinning(ctx, vcpu, pincpu);
|
||||
|
||||
if (!error && set_x2apic_state)
|
||||
error = vm_set_x2apic_state(ctx, vcpu, x2apic_state);
|
||||
|
||||
@ -1135,16 +1122,6 @@ main(int argc, char *argv[])
|
||||
printf("ldtr[%d]\t\t0x%04lx\n", vcpu, ldtr);
|
||||
}
|
||||
|
||||
if (!error && (get_pinning || get_all)) {
|
||||
error = vm_get_pinning(ctx, vcpu, &pincpu);
|
||||
if (error == 0) {
|
||||
if (pincpu < 0)
|
||||
printf("pincpu[%d]\tunpinned\n", vcpu);
|
||||
else
|
||||
printf("pincpu[%d]\t%d\n", vcpu, pincpu);
|
||||
}
|
||||
}
|
||||
|
||||
if (!error && (get_x2apic_state || get_all)) {
|
||||
error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state);
|
||||
if (error == 0)
|
||||
|
Loading…
Reference in New Issue
Block a user