From 1c8e7232b428413cd0a933f432d5eb6f5755f555 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sat, 18 Apr 2015 21:23:16 +0000 Subject: [PATCH] Remove lazy pmap switch code from i386. Naive benchmark with md(4) shows no difference with the code removed. On both amd64 and i386, assert that a released pmap is not active. Proposed and reviewed by: alc Discussed with: Svatopluk Kraus , peter Sponsored by: The FreeBSD Foundation MFC after: 2 weeks --- sys/amd64/amd64/pmap.c | 2 + sys/i386/i386/apic_vector.s | 15 ---- sys/i386/i386/db_trace.c | 3 +- sys/i386/i386/mp_machdep.c | 7 -- sys/i386/i386/pmap.c | 115 ++----------------------------- sys/i386/i386/swtch.s | 6 -- sys/i386/include/smp.h | 4 +- sys/i386/xen/mp_machdep.c | 11 +-- sys/i386/xen/pmap.c | 104 +--------------------------- sys/x86/include/acpica_machdep.h | 1 + sys/x86/xen/xen_apic.c | 19 ----- 11 files changed, 13 insertions(+), 274 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 29158e8ad984..dc823fa355ca 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -2532,6 +2532,8 @@ pmap_release(pmap_t pmap) pmap->pm_stats.resident_count)); KASSERT(vm_radix_is_empty(&pmap->pm_root), ("pmap_release: pmap has reserved page table page(s)")); + KASSERT(CPU_EMPTY(&pmap->pm_active), + ("releasing active pmap %p", pmap)); if (pmap_pcid_enabled) { /* diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s index 7cae22013a4d..316bc7a189c2 100644 --- a/sys/i386/i386/apic_vector.s +++ b/sys/i386/i386/apic_vector.s @@ -320,19 +320,4 @@ IDTVEC(rendezvous) POP_FRAME iret -/* - * Clean up when we lose out on the lazy context switch optimization. - * ie: when we are about to release a PTD but a cpu is still borrowing it. - */ - SUPERALIGN_TEXT -IDTVEC(lazypmap) - PUSH_FRAME - SET_KERNEL_SREGS - cld - - call pmap_lazyfix_action - - call as_lapic_eoi - POP_FRAME - iret #endif /* SMP */ diff --git a/sys/i386/i386/db_trace.c b/sys/i386/i386/db_trace.c index 822cc56666bb..f79765674bb6 100644 --- a/sys/i386/i386/db_trace.c +++ b/sys/i386/i386/db_trace.c @@ -316,8 +316,7 @@ db_nextframe(struct i386_frame **fp, db_addr_t *ip, struct thread *td) frame_type = TRAP_TIMERINT; else if (strcmp(name, "Xcpustop") == 0 || strcmp(name, "Xrendezvous") == 0 || - strcmp(name, "Xipi_intr_bitmap_handler") == 0 || - strcmp(name, "Xlazypmap") == 0) + strcmp(name, "Xipi_intr_bitmap_handler") == 0) frame_type = TRAP_INTERRUPT; } diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 083b28381005..b39675226f98 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -163,7 +163,6 @@ u_long *ipi_invlrng_counts[MAXCPU]; u_long *ipi_invlpg_counts[MAXCPU]; u_long *ipi_invlcache_counts[MAXCPU]; u_long *ipi_rendezvous_counts[MAXCPU]; -u_long *ipi_lazypmap_counts[MAXCPU]; static u_long *ipi_hardclock_counts[MAXCPU]; #endif @@ -576,10 +575,6 @@ cpu_mp_start(void) setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - /* Install an inter-CPU IPI for lazy pmap release */ - setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), - SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); - /* Install an inter-CPU IPI for all-CPU rendezvous */ setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -1672,8 +1667,6 @@ mp_ipi_intrcnt(void *dummy) intrcnt_add(buf, &ipi_ast_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i); intrcnt_add(buf, &ipi_rendezvous_counts[i]); - snprintf(buf, sizeof(buf), "cpu%d:lazypmap", i); - intrcnt_add(buf, &ipi_lazypmap_counts[i]); snprintf(buf, sizeof(buf), "cpu%d:hardclock", i); intrcnt_add(buf, &ipi_hardclock_counts[i]); } diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index faddd1e2bdb0..029a277b5bea 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -1248,18 +1248,14 @@ pmap_invalidate_cache_pages(vm_page_t *pages, int count) } /* - * Are we current address space or kernel? N.B. We return FALSE when - * a pmap's page table is in use because a kernel thread is borrowing - * it. The borrowed page table can change spontaneously, making any - * dependence on its continued use subject to a race condition. + * Are we current address space or kernel? */ static __inline int pmap_is_current(pmap_t pmap) { - return (pmap == kernel_pmap || - (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && - (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); + return (pmap == kernel_pmap || pmap == + vmspace_pmap(curthread->td_proc->p_vmspace)); } /* @@ -1923,108 +1919,6 @@ pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags) * Pmap allocation/deallocation routines. ***************************************************/ -#ifdef SMP -/* - * Deal with a SMP shootdown of other users of the pmap that we are - * trying to dispose of. This can be a bit hairy. - */ -static cpuset_t *lazymask; -static u_int lazyptd; -static volatile u_int lazywait; - -void pmap_lazyfix_action(void); - -void -pmap_lazyfix_action(void) -{ - -#ifdef COUNT_IPIS - (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; -#endif - if (rcr3() == lazyptd) - load_cr3(curpcb->pcb_cr3); - CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask); - atomic_store_rel_int(&lazywait, 1); -} - -static void -pmap_lazyfix_self(u_int cpuid) -{ - - if (rcr3() == lazyptd) - load_cr3(curpcb->pcb_cr3); - CPU_CLR_ATOMIC(cpuid, lazymask); -} - - -static void -pmap_lazyfix(pmap_t pmap) -{ - cpuset_t mymask, mask; - u_int cpuid, spins; - int lsb; - - mask = pmap->pm_active; - while (!CPU_EMPTY(&mask)) { - spins = 50000000; - - /* Find least significant set bit. */ - lsb = CPU_FFS(&mask); - MPASS(lsb != 0); - lsb--; - CPU_SETOF(lsb, &mask); - mtx_lock_spin(&smp_ipi_mtx); -#if defined(PAE) || defined(PAE_TABLES) - lazyptd = vtophys(pmap->pm_pdpt); -#else - lazyptd = vtophys(pmap->pm_pdir); -#endif - cpuid = PCPU_GET(cpuid); - - /* Use a cpuset just for having an easy check. */ - CPU_SETOF(cpuid, &mymask); - if (!CPU_CMP(&mask, &mymask)) { - lazymask = &pmap->pm_active; - pmap_lazyfix_self(cpuid); - } else { - atomic_store_rel_int((u_int *)&lazymask, - (u_int)&pmap->pm_active); - atomic_store_rel_int(&lazywait, 0); - ipi_selected(mask, IPI_LAZYPMAP); - while (lazywait == 0) { - ia32_pause(); - if (--spins == 0) - break; - } - } - mtx_unlock_spin(&smp_ipi_mtx); - if (spins == 0) - printf("pmap_lazyfix: spun for 50000000\n"); - mask = pmap->pm_active; - } -} - -#else /* SMP */ - -/* - * Cleaning up on uniprocessor is easy. For various reasons, we're - * unlikely to have to even execute this code, including the fact - * that the cleanup is deferred until the parent does a wait(2), which - * means that another userland process has run. - */ -static void -pmap_lazyfix(pmap_t pmap) -{ - u_int cr3; - - cr3 = vtophys(pmap->pm_pdir); - if (cr3 == rcr3()) { - load_cr3(curpcb->pcb_cr3); - CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); - } -} -#endif /* SMP */ - /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. @@ -2041,8 +1935,9 @@ pmap_release(pmap_t pmap) pmap->pm_stats.resident_count)); KASSERT(vm_radix_is_empty(&pmap->pm_root), ("pmap_release: pmap has reserved page table page(s)")); + KASSERT(CPU_EMPTY(&pmap->pm_active), + ("releasing active pmap %p", pmap)); - pmap_lazyfix(pmap); mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index 2859ec7cb4f6..26bfd3b1e120 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -174,12 +174,6 @@ ENTRY(cpu_switch) /* switch address space */ movl PCB_CR3(%edx),%eax -#if defined(PAE) || defined(PAE_TABLES) - cmpl %eax,IdlePDPT /* Kernel address space? */ -#else - cmpl %eax,IdlePTD /* Kernel address space? */ -#endif - je sw0 READ_CR3(%ebx) /* The same address space? */ cmpl %ebx,%eax je sw0 diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index cc9a8eaf9a8e..0abbca4158ff 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -42,7 +42,6 @@ extern u_long *ipi_invlrng_counts[MAXCPU]; extern u_long *ipi_invlpg_counts[MAXCPU]; extern u_long *ipi_invlcache_counts[MAXCPU]; extern u_long *ipi_rendezvous_counts[MAXCPU]; -extern u_long *ipi_lazypmap_counts[MAXCPU]; #endif /* IPI handlers */ @@ -54,8 +53,7 @@ inthand_t IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */ IDTVEC(cpustop), /* CPU stops & waits to be restarted */ IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */ - IDTVEC(rendezvous), /* handle CPU rendezvous */ - IDTVEC(lazypmap); /* handle lazy pmap release */ + IDTVEC(rendezvous); /* handle CPU rendezvous */ /* functions in mp_machdep.c */ void cpu_add(u_int apic_id, char boot_cpu); diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c index e391981d060f..329bba11ef65 100644 --- a/sys/i386/xen/mp_machdep.c +++ b/sys/i386/xen/mp_machdep.c @@ -96,7 +96,6 @@ extern struct pcpu __pcpu[]; extern void Xhypervisor_callback(void); extern void failsafe_callback(void); -extern void pmap_lazyfix_action(void); /*--------------------------- Forward Declarations ---------------------------*/ static driver_filter_t smp_reschedule_interrupt; @@ -370,24 +369,16 @@ iv_invlcache(uintptr_t a, uintptr_t b) atomic_add_int(&smp_tlb_wait, 1); } -static void -iv_lazypmap(uintptr_t a, uintptr_t b) -{ - pmap_lazyfix_action(); - atomic_add_int(&smp_tlb_wait, 1); -} - /* * These start from "IPI offset" APIC_IPI_INTS */ -static call_data_func_t *ipi_vectors[6] = +static call_data_func_t *ipi_vectors[5] = { iv_rendezvous, iv_invltlb, iv_invlpg, iv_invlrng, iv_invlcache, - iv_lazypmap, }; /* diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c index 1dc50b2a0a16..757fc36bb3d4 100644 --- a/sys/i386/xen/pmap.c +++ b/sys/i386/xen/pmap.c @@ -1652,107 +1652,6 @@ pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags) * Pmap allocation/deallocation routines. ***************************************************/ -#ifdef SMP -/* - * Deal with a SMP shootdown of other users of the pmap that we are - * trying to dispose of. This can be a bit hairy. - */ -static cpuset_t *lazymask; -static u_int lazyptd; -static volatile u_int lazywait; - -void pmap_lazyfix_action(void); - -void -pmap_lazyfix_action(void) -{ - -#ifdef COUNT_IPIS - (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; -#endif - if (rcr3() == lazyptd) - load_cr3(PCPU_GET(curpcb)->pcb_cr3); - CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask); - atomic_store_rel_int(&lazywait, 1); -} - -static void -pmap_lazyfix_self(u_int cpuid) -{ - - if (rcr3() == lazyptd) - load_cr3(PCPU_GET(curpcb)->pcb_cr3); - CPU_CLR_ATOMIC(cpuid, lazymask); -} - - -static void -pmap_lazyfix(pmap_t pmap) -{ - cpuset_t mymask, mask; - u_int cpuid, spins; - int lsb; - - mask = pmap->pm_active; - while (!CPU_EMPTY(&mask)) { - spins = 50000000; - - /* Find least significant set bit. */ - lsb = CPU_FFS(&mask); - MPASS(lsb != 0); - lsb--; - CPU_SETOF(lsb, &mask); - mtx_lock_spin(&smp_ipi_mtx); -#ifdef PAE - lazyptd = vtophys(pmap->pm_pdpt); -#else - lazyptd = vtophys(pmap->pm_pdir); -#endif - cpuid = PCPU_GET(cpuid); - - /* Use a cpuset just for having an easy check. */ - CPU_SETOF(cpuid, &mymask); - if (!CPU_CMP(&mask, &mymask)) { - lazymask = &pmap->pm_active; - pmap_lazyfix_self(cpuid); - } else { - atomic_store_rel_int((u_int *)&lazymask, - (u_int)&pmap->pm_active); - atomic_store_rel_int(&lazywait, 0); - ipi_selected(mask, IPI_LAZYPMAP); - while (lazywait == 0) { - ia32_pause(); - if (--spins == 0) - break; - } - } - mtx_unlock_spin(&smp_ipi_mtx); - if (spins == 0) - printf("pmap_lazyfix: spun for 50000000\n"); - mask = pmap->pm_active; - } -} - -#else /* SMP */ - -/* - * Cleaning up on uniprocessor is easy. For various reasons, we're - * unlikely to have to even execute this code, including the fact - * that the cleanup is deferred until the parent does a wait(2), which - * means that another userland process has run. - */ -static void -pmap_lazyfix(pmap_t pmap) -{ - u_int cr3; - - cr3 = vtophys(pmap->pm_pdir); - if (cr3 == rcr3()) { - load_cr3(PCPU_GET(curpcb)->pcb_cr3); - CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); - } -} -#endif /* SMP */ /* * Release any resources held by the given physical map. @@ -1780,7 +1679,8 @@ pmap_release(pmap_t pmap) mtx_lock(&createdelete_lock); #endif - pmap_lazyfix(pmap); + KASSERT(CPU_EMPTY(&pmap->pm_active), + ("releasing active pmap %p", pmap)); mtx_lock_spin(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock_spin(&allpmaps_lock); diff --git a/sys/x86/include/acpica_machdep.h b/sys/x86/include/acpica_machdep.h index 46080c0e8faf..136285caa699 100644 --- a/sys/x86/include/acpica_machdep.h +++ b/sys/x86/include/acpica_machdep.h @@ -74,6 +74,7 @@ enum intr_polarity; void acpi_SetDefaultIntrModel(int model); void acpi_cpu_c1(void); +void acpi_cpu_idle_mwait(uint32_t mwait_hint); void *acpi_map_table(vm_paddr_t pa, const char *sig); void acpi_unmap_table(void *table); vm_paddr_t acpi_find_table(const char *sig); diff --git a/sys/x86/xen/xen_apic.c b/sys/x86/xen/xen_apic.c index 8e831842ee6a..c742920d90e1 100644 --- a/sys/x86/xen/xen_apic.c +++ b/sys/x86/xen/xen_apic.c @@ -68,9 +68,6 @@ static driver_filter_t xen_invltlb; static driver_filter_t xen_invlpg; static driver_filter_t xen_invlrng; static driver_filter_t xen_invlcache; -#ifdef __i386__ -static driver_filter_t xen_lazypmap; -#endif static driver_filter_t xen_ipi_bitmap_handler; static driver_filter_t xen_cpustop_handler; static driver_filter_t xen_cpususpend_handler; @@ -79,9 +76,6 @@ static driver_filter_t xen_cpustophard_handler; /*---------------------------- Extern Declarations ---------------------------*/ /* Variables used by mp_machdep to perform the MMU related IPIs */ -#ifdef __i386__ -extern void pmap_lazyfix_action(void); -#endif #ifdef __amd64__ extern int pmap_pcid_enabled; #endif @@ -104,9 +98,6 @@ static struct xen_ipi_handler xen_ipis[] = [IPI_TO_IDX(IPI_INVLPG)] = { xen_invlpg, "ipg" }, [IPI_TO_IDX(IPI_INVLRNG)] = { xen_invlrng, "irg" }, [IPI_TO_IDX(IPI_INVLCACHE)] = { xen_invlcache, "ic" }, -#ifdef __i386__ - [IPI_TO_IDX(IPI_LAZYPMAP)] = { xen_lazypmap, "lp" }, -#endif [IPI_TO_IDX(IPI_BITMAP_VECTOR)] = { xen_ipi_bitmap_handler, "b" }, [IPI_TO_IDX(IPI_STOP)] = { xen_cpustop_handler, "st" }, [IPI_TO_IDX(IPI_SUSPEND)] = { xen_cpususpend_handler, "sp" }, @@ -474,16 +465,6 @@ xen_invlcache(void *arg) return (FILTER_HANDLED); } -#ifdef __i386__ -static int -xen_lazypmap(void *arg) -{ - - pmap_lazyfix_action(); - return (FILTER_HANDLED); -} -#endif - static int xen_cpustop_handler(void *arg) {