mirror of
https://git.FreeBSD.org/src.git
synced 2024-10-19 02:29:40 +00:00
vmm: Make pmap_invalidate_ept() wait synchronously for guest exits
Currently EPT TLB invalidation is done by incrementing a generation counter and issuing an IPI to all CPUs currently running vCPU threads. The VMM inner loop caches the most recently observed generation on each host CPU and invalidates TLB entries before executing the VM if the cached generation number is not the most recent value. pmap_invalidate_ept() issues IPIs to force each vCPU to stop executing guest instructions and reload the generation number. However, it does not actually wait for vCPUs to exit, potentially creating a window where guests may continue to reference stale TLB entries. Fix the problem by bracketing guest execution with an SMR read section which is entered before loading the invalidation generation. Then, pmap_invalidate_ept() increments the current write sequence before loading pm_active and sending IPIs, and polls readers to ensure that all vCPUs potentially operating with stale TLB entries have exited before pmap_invalidate_ept() returns. Also ensure that unsynchronized loads of the generation counter are wrapped with atomic(9), and stop (inconsistently) updating the invalidation counter and pm_active bitmask with acquire semantics. Reviewed by: grehan, kib Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D26910
This commit is contained in:
parent
a4848c103c
commit
6f5a960678
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=367593
@ -125,6 +125,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/rangeset.h>
|
||||
#include <sys/rwlock.h>
|
||||
#include <sys/sbuf.h>
|
||||
#include <sys/smr.h>
|
||||
#include <sys/sx.h>
|
||||
#include <sys/turnstile.h>
|
||||
#include <sys/vmem.h>
|
||||
@ -2647,7 +2648,7 @@ pmap_update_pde_store(pmap_t pmap, pd_entry_t *pde, pd_entry_t newpde)
|
||||
* "all" host cpus and force any vcpu context to exit as a
|
||||
* side-effect.
|
||||
*/
|
||||
atomic_add_acq_long(&pmap->pm_eptgen, 1);
|
||||
atomic_add_long(&pmap->pm_eptgen, 1);
|
||||
break;
|
||||
default:
|
||||
panic("pmap_update_pde_store: bad pm_type %d", pmap->pm_type);
|
||||
@ -2722,6 +2723,7 @@ pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde)
|
||||
static __inline void
|
||||
pmap_invalidate_ept(pmap_t pmap)
|
||||
{
|
||||
smr_seq_t goal;
|
||||
int ipinum;
|
||||
|
||||
sched_pin();
|
||||
@ -2742,8 +2744,17 @@ pmap_invalidate_ept(pmap_t pmap)
|
||||
* Each vcpu keeps a cache of this counter and compares it
|
||||
* just before a vmresume. If the counter is out-of-date an
|
||||
* invept will be done to flush stale mappings from the TLB.
|
||||
*
|
||||
* To ensure that all vCPU threads have observed the new counter
|
||||
* value before returning, we use SMR. Ordering is important here:
|
||||
* the VMM enters an SMR read section before loading the counter
|
||||
* and after updating the pm_active bit set. Thus, pm_active is
|
||||
* a superset of active readers, and any reader that has observed
|
||||
* the goal has observed the new counter value.
|
||||
*/
|
||||
atomic_add_acq_long(&pmap->pm_eptgen, 1);
|
||||
atomic_add_long(&pmap->pm_eptgen, 1);
|
||||
|
||||
goal = smr_advance(pmap->pm_eptsmr);
|
||||
|
||||
/*
|
||||
* Force the vcpu to exit and trap back into the hypervisor.
|
||||
@ -2751,6 +2762,12 @@ pmap_invalidate_ept(pmap_t pmap)
|
||||
ipinum = pmap->pm_flags & PMAP_NESTED_IPIMASK;
|
||||
ipi_selected(pmap->pm_active, ipinum);
|
||||
sched_unpin();
|
||||
|
||||
/*
|
||||
* Ensure that all active vCPUs will observe the new generation counter
|
||||
* value before executing any more guest instructions.
|
||||
*/
|
||||
smr_wait(pmap->pm_eptsmr, goal);
|
||||
}
|
||||
|
||||
static cpuset_t
|
||||
@ -4086,7 +4103,8 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
|
||||
* address space.
|
||||
* Install minimal kernel mappings in PTI case.
|
||||
*/
|
||||
if (pm_type == PT_X86) {
|
||||
switch (pm_type) {
|
||||
case PT_X86:
|
||||
pmap->pm_cr3 = pmltop_phys;
|
||||
if (pmap_is_la57(pmap))
|
||||
pmap_pinit_pml5(pmltop_pg);
|
||||
@ -4107,6 +4125,11 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
|
||||
rangeset_init(&pmap->pm_pkru, pkru_dup_range,
|
||||
pkru_free_range, pmap, M_NOWAIT);
|
||||
}
|
||||
break;
|
||||
case PT_EPT:
|
||||
case PT_RVI:
|
||||
pmap->pm_eptsmr = smr_create("pmap", 0, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
pmap->pm_root.rt_root = 0;
|
||||
|
@ -270,6 +270,7 @@
|
||||
#include <sys/_mutex.h>
|
||||
#include <sys/_pctrie.h>
|
||||
#include <sys/_rangeset.h>
|
||||
#include <sys/_smr.h>
|
||||
|
||||
#include <vm/_vm_radix.h>
|
||||
|
||||
@ -371,6 +372,7 @@ struct pmap {
|
||||
struct pmap_statistics pm_stats; /* pmap statistics */
|
||||
struct vm_radix pm_root; /* spare page table pages */
|
||||
long pm_eptgen; /* EPT pmap generation id */
|
||||
smr_t pm_eptsmr;
|
||||
int pm_flags;
|
||||
struct pmap_pcids pm_pcids[MAXCPU];
|
||||
struct rangeset pm_pkru;
|
||||
|
@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/pcpu.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/smr.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
@ -1800,15 +1801,17 @@ restore_host_tss(void)
|
||||
}
|
||||
|
||||
static void
|
||||
check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, u_int thiscpu)
|
||||
svm_pmap_activate(struct svm_softc *sc, int vcpuid, pmap_t pmap)
|
||||
{
|
||||
struct svm_vcpu *vcpustate;
|
||||
struct vmcb_ctrl *ctrl;
|
||||
long eptgen;
|
||||
int cpu;
|
||||
bool alloc_asid;
|
||||
|
||||
KASSERT(CPU_ISSET(thiscpu, &pmap->pm_active), ("%s: nested pmap not "
|
||||
"active on cpu %u", __func__, thiscpu));
|
||||
cpu = curcpu;
|
||||
CPU_SET_ATOMIC(cpu, &pmap->pm_active);
|
||||
smr_enter(pmap->pm_eptsmr);
|
||||
|
||||
vcpustate = svm_get_vcpu(sc, vcpuid);
|
||||
ctrl = svm_get_vmcb_ctrl(sc, vcpuid);
|
||||
@ -1849,10 +1852,10 @@ check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, u_int thiscpu)
|
||||
*/
|
||||
|
||||
alloc_asid = false;
|
||||
eptgen = pmap->pm_eptgen;
|
||||
eptgen = atomic_load_long(&pmap->pm_eptgen);
|
||||
ctrl->tlb_ctrl = VMCB_TLB_FLUSH_NOTHING;
|
||||
|
||||
if (vcpustate->asid.gen != asid[thiscpu].gen) {
|
||||
if (vcpustate->asid.gen != asid[cpu].gen) {
|
||||
alloc_asid = true; /* (c) and (d) */
|
||||
} else if (vcpustate->eptgen != eptgen) {
|
||||
if (flush_by_asid())
|
||||
@ -1869,10 +1872,10 @@ check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, u_int thiscpu)
|
||||
}
|
||||
|
||||
if (alloc_asid) {
|
||||
if (++asid[thiscpu].num >= nasid) {
|
||||
asid[thiscpu].num = 1;
|
||||
if (++asid[thiscpu].gen == 0)
|
||||
asid[thiscpu].gen = 1;
|
||||
if (++asid[cpu].num >= nasid) {
|
||||
asid[cpu].num = 1;
|
||||
if (++asid[cpu].gen == 0)
|
||||
asid[cpu].gen = 1;
|
||||
/*
|
||||
* If this cpu does not support "flush-by-asid"
|
||||
* then flush the entire TLB on a generation
|
||||
@ -1882,8 +1885,8 @@ check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, u_int thiscpu)
|
||||
if (!flush_by_asid())
|
||||
ctrl->tlb_ctrl = VMCB_TLB_FLUSH_ALL;
|
||||
}
|
||||
vcpustate->asid.gen = asid[thiscpu].gen;
|
||||
vcpustate->asid.num = asid[thiscpu].num;
|
||||
vcpustate->asid.gen = asid[cpu].gen;
|
||||
vcpustate->asid.num = asid[cpu].num;
|
||||
|
||||
ctrl->asid = vcpustate->asid.num;
|
||||
svm_set_dirty(sc, vcpuid, VMCB_CACHE_ASID);
|
||||
@ -1902,6 +1905,13 @@ check_asid(struct svm_softc *sc, int vcpuid, pmap_t pmap, u_int thiscpu)
|
||||
("ASID mismatch: %u/%u", ctrl->asid, vcpustate->asid.num));
|
||||
}
|
||||
|
||||
static void
|
||||
svm_pmap_deactivate(pmap_t pmap)
|
||||
{
|
||||
smr_exit(pmap->pm_eptsmr);
|
||||
CPU_CLR_ATOMIC(curcpu, &pmap->pm_active);
|
||||
}
|
||||
|
||||
static __inline void
|
||||
disable_gintr(void)
|
||||
{
|
||||
@ -2083,14 +2093,11 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
|
||||
|
||||
svm_inj_interrupts(svm_sc, vcpu, vlapic);
|
||||
|
||||
/* Activate the nested pmap on 'curcpu' */
|
||||
CPU_SET_ATOMIC_ACQ(curcpu, &pmap->pm_active);
|
||||
|
||||
/*
|
||||
* Check the pmap generation and the ASID generation to
|
||||
* ensure that the vcpu does not use stale TLB mappings.
|
||||
*/
|
||||
check_asid(svm_sc, vcpu, pmap, curcpu);
|
||||
svm_pmap_activate(svm_sc, vcpu, pmap);
|
||||
|
||||
ctrl->vmcb_clean = vmcb_clean & ~vcpustate->dirty;
|
||||
vcpustate->dirty = 0;
|
||||
@ -2102,7 +2109,7 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
|
||||
svm_launch(vmcb_pa, gctx, get_pcpu());
|
||||
svm_dr_leave_guest(gctx);
|
||||
|
||||
CPU_CLR_ATOMIC(curcpu, &pmap->pm_active);
|
||||
svm_pmap_deactivate(pmap);
|
||||
|
||||
/*
|
||||
* The host GDTR and IDTR is saved by VMRUN and restored
|
||||
|
@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/pcpu.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/smr.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
@ -1273,7 +1274,7 @@ vmx_invvpid(struct vmx *vmx, int vcpu, pmap_t pmap, int running)
|
||||
* Note also that this will invalidate mappings tagged with 'vpid'
|
||||
* for "all" EP4TAs.
|
||||
*/
|
||||
if (pmap->pm_eptgen == vmx->eptgen[curcpu]) {
|
||||
if (atomic_load_long(&pmap->pm_eptgen) == vmx->eptgen[curcpu]) {
|
||||
invvpid_desc._res1 = 0;
|
||||
invvpid_desc._res2 = 0;
|
||||
invvpid_desc.vpid = vmxstate->vpid;
|
||||
@ -2948,6 +2949,7 @@ vmx_pmap_activate(struct vmx *vmx, pmap_t pmap)
|
||||
cpu = curcpu;
|
||||
|
||||
CPU_SET_ATOMIC(cpu, &pmap->pm_active);
|
||||
smr_enter(pmap->pm_eptsmr);
|
||||
eptgen = atomic_load_long(&pmap->pm_eptgen);
|
||||
if (eptgen != vmx->eptgen[cpu]) {
|
||||
vmx->eptgen[cpu] = eptgen;
|
||||
@ -2959,6 +2961,7 @@ vmx_pmap_activate(struct vmx *vmx, pmap_t pmap)
|
||||
static __inline void
|
||||
vmx_pmap_deactivate(struct vmx *vmx, pmap_t pmap)
|
||||
{
|
||||
smr_exit(pmap->pm_eptsmr);
|
||||
CPU_CLR_ATOMIC(curcpu, &pmap->pm_active);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user