Implement superpages for PowerPC64 (HPT)

This change adds support for transparent superpages for PowerPC64
systems using Hashed Page Tables (HPT). All pmap operations are
supported.

The changes were inspired by RISC-V implementation of superpages,
by @markj (r344106), but heavily adapted to fit PPC64 HPT architecture
and existing MMU OEA64 code.

While these changes are not better tested, superpages support is disabled by
default. To enable it, use vm.pmap.superpages_enabled=1.

In this initial implementation, when superpages are disabled, system
performance stays at the same level as without these changes. When
superpages are enabled, buildworld time increases a bit (~2%). However,
for workloads that put a heavy pressure on the TLB the performance boost
is much bigger (see HPC Challenge and pgbench on D25237).

Reviewed by:	jhibbits
Sponsored by:	Eldorado Research Institute (eldorado.org.br)
Differential Revision:	https://reviews.freebsd.org/D25237
This commit is contained in:
Leandro Lupori 2020-11-06 14:12:45 +00:00
parent 5d0e861910
commit e2d6c417e3
11 changed files with 1588 additions and 165 deletions

File diff suppressed because it is too large Load Diff

View File

@ -82,12 +82,18 @@ int64_t moea64_pte_insert(struct pvo_entry *);
int64_t moea64_pte_unset(struct pvo_entry *);
int64_t moea64_pte_clear(struct pvo_entry *, uint64_t);
int64_t moea64_pte_synch(struct pvo_entry *);
int64_t moea64_pte_insert_sp(struct pvo_entry *);
int64_t moea64_pte_unset_sp(struct pvo_entry *);
int64_t moea64_pte_replace_sp(struct pvo_entry *);
typedef int64_t (*moea64_pte_replace_t)(struct pvo_entry *, int);
typedef int64_t (*moea64_pte_insert_t)(struct pvo_entry *);
typedef int64_t (*moea64_pte_unset_t)(struct pvo_entry *);
typedef int64_t (*moea64_pte_clear_t)(struct pvo_entry *, uint64_t);
typedef int64_t (*moea64_pte_synch_t)(struct pvo_entry *);
typedef int64_t (*moea64_pte_insert_sp_t)(struct pvo_entry *);
typedef int64_t (*moea64_pte_unset_sp_t)(struct pvo_entry *);
typedef int64_t (*moea64_pte_replace_sp_t)(struct pvo_entry *);
struct moea64_funcs {
moea64_pte_replace_t pte_replace;
@ -95,6 +101,9 @@ struct moea64_funcs {
moea64_pte_unset_t pte_unset;
moea64_pte_clear_t pte_clear;
moea64_pte_synch_t pte_synch;
moea64_pte_insert_sp_t pte_insert_sp;
moea64_pte_unset_sp_t pte_unset_sp;
moea64_pte_replace_sp_t pte_replace_sp;
};
extern struct moea64_funcs *moea64_ops;
@ -128,5 +137,6 @@ extern uint64_t moea64_large_page_mask;
extern u_long moea64_pteg_count;
extern u_long moea64_pteg_mask;
extern int n_slbs;
extern bool moea64_has_lp_4k_16m;
#endif /* _POWERPC_AIM_MMU_OEA64_H */

View File

@ -132,11 +132,32 @@ __FBSDID("$FreeBSD$");
/* POWER9 only permits a 64k partition table size. */
#define PART_SIZE 0x10000
/* Actual page sizes (to be used with tlbie, when L=0) */
#define AP_4K 0x00
#define AP_16M 0x80
#define LPTE_KERNEL_VSID_BIT (KERNEL_VSID_BIT << \
(16 - (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)))
/* Abbreviated Virtual Address Page - high bits */
#define LPTE_AVA_PGNHI_MASK 0x0000000000000F80ULL
#define LPTE_AVA_PGNHI_SHIFT 7
/* Effective Address Page - low bits */
#define EA_PAGELO_MASK 0x7ffULL
#define EA_PAGELO_SHIFT 11
static bool moea64_crop_tlbie;
static bool moea64_need_lock;
/*
* The tlbie instruction has two forms: an old one used by PowerISA
* 2.03 and prior, and a newer one used by PowerISA 2.06 and later.
* We need to support both.
*/
static __inline void
TLBIE(uint64_t vpn) {
TLBIE(uint64_t vpn, uint64_t oldptehi)
{
#ifndef __powerpc64__
register_t vpn_hi, vpn_lo;
register_t msr;
@ -153,11 +174,31 @@ TLBIE(uint64_t vpn) {
while (!atomic_cmpset_int(&tlbie_lock, 0, 1));
isync(); /* Flush instruction queue once lock acquired */
if (moea64_crop_tlbie)
if (moea64_crop_tlbie) {
vpn &= ~(0xffffULL << 48);
#ifdef __powerpc64__
if ((oldptehi & LPTE_BIG) != 0)
__asm __volatile("tlbie %0, 1" :: "r"(vpn) :
"memory");
else
__asm __volatile("tlbie %0, 0" :: "r"(vpn) :
"memory");
__asm __volatile("eieio; tlbsync; ptesync" :::
"memory");
goto done;
#endif
}
}
#ifdef __powerpc64__
/*
* If this page has LPTE_BIG set and is from userspace, then
* it must be a superpage with 4KB base/16MB actual page size.
*/
if ((oldptehi & LPTE_BIG) != 0 &&
(oldptehi & LPTE_KERNEL_VSID_BIT) == 0)
vpn |= AP_16M;
/*
* Explicitly clobber r0. The tlbie instruction has two forms: an old
* one used by PowerISA 2.03 and prior, and a newer one used by PowerISA
@ -168,7 +209,7 @@ TLBIE(uint64_t vpn) {
* in the newer form is in the same position as the L(page size) bit of
* the old form, so a slong as RS is 0, we're good on both sides.
*/
__asm __volatile("li 0, 0 \n tlbie %0" :: "r"(vpn) : "r0", "memory");
__asm __volatile("li 0, 0 \n tlbie %0, 0" :: "r"(vpn) : "r0", "memory");
__asm __volatile("eieio; tlbsync; ptesync" ::: "memory");
#else
vpn_hi = (uint32_t)(vpn >> 32);
@ -194,6 +235,7 @@ TLBIE(uint64_t vpn) {
intr_restore(intr);
#endif
done:
/* No barriers or special ops -- taken care of by ptesync above */
if (need_lock)
tlbie_lock = 0;
@ -224,6 +266,9 @@ static int64_t moea64_pte_synch_native(struct pvo_entry *);
static int64_t moea64_pte_clear_native(struct pvo_entry *, uint64_t);
static int64_t moea64_pte_replace_native(struct pvo_entry *, int);
static int64_t moea64_pte_unset_native(struct pvo_entry *);
static int64_t moea64_pte_insert_sp_native(struct pvo_entry *);
static int64_t moea64_pte_unset_sp_native(struct pvo_entry *);
static int64_t moea64_pte_replace_sp_native(struct pvo_entry *);
/*
* Utility routines.
@ -245,10 +290,13 @@ static struct pmap_funcs moea64_native_methods = {
static struct moea64_funcs moea64_native_funcs = {
.pte_synch = moea64_pte_synch_native,
.pte_clear = moea64_pte_clear_native,
.pte_unset = moea64_pte_unset_native,
.pte_replace = moea64_pte_replace_native,
.pte_insert = moea64_pte_insert_native,
.pte_clear = moea64_pte_clear_native,
.pte_unset = moea64_pte_unset_native,
.pte_replace = moea64_pte_replace_native,
.pte_insert = moea64_pte_insert_native,
.pte_insert_sp = moea64_pte_insert_sp_native,
.pte_unset_sp = moea64_pte_unset_sp_native,
.pte_replace_sp = moea64_pte_replace_sp_native,
};
MMU_DEF_INHERIT(oea64_mmu_native, MMU_TYPE_G5, moea64_native_methods, oea64_mmu);
@ -321,7 +369,7 @@ moea64_pte_clear_native(struct pvo_entry *pvo, uint64_t ptebit)
rw_runlock(&moea64_eviction_lock);
critical_enter();
TLBIE(pvo->pvo_vpn);
TLBIE(pvo->pvo_vpn, properpt.pte_hi);
critical_exit();
} else {
rw_runlock(&moea64_eviction_lock);
@ -332,21 +380,10 @@ moea64_pte_clear_native(struct pvo_entry *pvo, uint64_t ptebit)
return (ptelo & (LPTE_REF | LPTE_CHG));
}
static int64_t
moea64_pte_unset_native(struct pvo_entry *pvo)
static __always_inline int64_t
moea64_pte_unset_locked(volatile struct lpte *pt, uint64_t vpn)
{
volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot;
uint64_t ptelo, pvo_ptevpn;
pvo_ptevpn = moea64_pte_vpn_from_pvo_vpn(pvo);
rw_rlock(&moea64_eviction_lock);
if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != pvo_ptevpn) {
/* Evicted */
STAT_MOEA64(moea64_pte_overflow--);
rw_runlock(&moea64_eviction_lock);
return (-1);
}
uint64_t ptelo;
/*
* Invalidate the pte, briefly locking it to collect RC bits. No
@ -356,11 +393,10 @@ moea64_pte_unset_native(struct pvo_entry *pvo)
critical_enter();
pt->pte_hi = htobe64((be64toh(pt->pte_hi) & ~LPTE_VALID) | LPTE_LOCKED);
PTESYNC();
TLBIE(pvo->pvo_vpn);
TLBIE(vpn, pt->pte_hi);
ptelo = be64toh(pt->pte_lo);
*((volatile int32_t *)(&pt->pte_hi) + 1) = 0; /* Release lock */
critical_exit();
rw_runlock(&moea64_eviction_lock);
/* Keep statistics */
STAT_MOEA64(moea64_pte_valid--);
@ -368,6 +404,29 @@ moea64_pte_unset_native(struct pvo_entry *pvo)
return (ptelo & (LPTE_CHG | LPTE_REF));
}
static int64_t
moea64_pte_unset_native(struct pvo_entry *pvo)
{
volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot;
int64_t ret;
uint64_t pvo_ptevpn;
pvo_ptevpn = moea64_pte_vpn_from_pvo_vpn(pvo);
rw_rlock(&moea64_eviction_lock);
if ((be64toh(pt->pte_hi & LPTE_AVPN_MASK)) != pvo_ptevpn) {
/* Evicted */
STAT_MOEA64(moea64_pte_overflow--);
ret = -1;
} else
ret = moea64_pte_unset_locked(pt, pvo->pvo_vpn);
rw_runlock(&moea64_eviction_lock);
return (ret);
}
static int64_t
moea64_pte_replace_inval_native(struct pvo_entry *pvo,
volatile struct lpte *pt)
@ -394,7 +453,7 @@ moea64_pte_replace_inval_native(struct pvo_entry *pvo,
critical_enter();
pt->pte_hi = htobe64((be64toh(pt->pte_hi) & ~LPTE_VALID) | LPTE_LOCKED);
PTESYNC();
TLBIE(pvo->pvo_vpn);
TLBIE(pvo->pvo_vpn, pt->pte_hi);
ptelo = be64toh(pt->pte_lo);
EIEIO();
pt->pte_lo = htobe64(properpt.pte_lo);
@ -734,7 +793,7 @@ moea64_insert_to_pteg_native(struct lpte *pvo_pt, uintptr_t slotbase,
va |= (oldptehi & LPTE_AVPN_MASK) <<
(ADDR_API_SHFT64 - ADDR_PIDX_SHFT);
PTESYNC();
TLBIE(va);
TLBIE(va, oldptehi);
STAT_MOEA64(moea64_pte_valid--);
STAT_MOEA64(moea64_pte_overflow++);
}
@ -754,26 +813,18 @@ moea64_insert_to_pteg_native(struct lpte *pvo_pt, uintptr_t slotbase,
return (k);
}
static int64_t
moea64_pte_insert_native(struct pvo_entry *pvo)
static __always_inline int64_t
moea64_pte_insert_locked(struct pvo_entry *pvo, struct lpte *insertpt,
uint64_t mask)
{
struct lpte insertpt;
uintptr_t slot;
/* Initialize PTE */
moea64_pte_from_pvo(pvo, &insertpt);
/* Make sure further insertion is locked out during evictions */
rw_rlock(&moea64_eviction_lock);
/*
* First try primary hash.
*/
pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */
slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
LPTE_VALID | LPTE_WIRED | LPTE_LOCKED);
slot = moea64_insert_to_pteg_native(insertpt, pvo->pvo_pte.slot,
mask | LPTE_WIRED | LPTE_LOCKED);
if (slot != -1) {
rw_runlock(&moea64_eviction_lock);
pvo->pvo_pte.slot = slot;
return (0);
}
@ -782,52 +833,54 @@ moea64_pte_insert_native(struct pvo_entry *pvo)
* Now try secondary hash.
*/
pvo->pvo_vaddr ^= PVO_HID;
insertpt.pte_hi ^= LPTE_HID;
insertpt->pte_hi ^= LPTE_HID;
pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
LPTE_VALID | LPTE_WIRED | LPTE_LOCKED);
slot = moea64_insert_to_pteg_native(insertpt, pvo->pvo_pte.slot,
mask | LPTE_WIRED | LPTE_LOCKED);
if (slot != -1) {
rw_runlock(&moea64_eviction_lock);
pvo->pvo_pte.slot = slot;
return (0);
}
/*
* Out of luck. Find a PTE to sacrifice.
*/
/* Lock out all insertions for a bit */
if (!rw_try_upgrade(&moea64_eviction_lock)) {
rw_runlock(&moea64_eviction_lock);
rw_wlock(&moea64_eviction_lock);
}
slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
LPTE_WIRED | LPTE_LOCKED);
if (slot != -1) {
rw_wunlock(&moea64_eviction_lock);
pvo->pvo_pte.slot = slot;
return (0);
}
/* Try other hash table. Now we're getting desperate... */
pvo->pvo_vaddr ^= PVO_HID;
insertpt.pte_hi ^= LPTE_HID;
pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
LPTE_WIRED | LPTE_LOCKED);
if (slot != -1) {
rw_wunlock(&moea64_eviction_lock);
pvo->pvo_pte.slot = slot;
return (0);
}
/* No freeable slots in either PTEG? We're hosed. */
rw_wunlock(&moea64_eviction_lock);
panic("moea64_pte_insert: overflow");
return (-1);
}
static int64_t
moea64_pte_insert_native(struct pvo_entry *pvo)
{
struct lpte insertpt;
int64_t ret;
/* Initialize PTE */
moea64_pte_from_pvo(pvo, &insertpt);
/* Make sure further insertion is locked out during evictions */
rw_rlock(&moea64_eviction_lock);
pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */
ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_VALID);
if (ret == -1) {
/*
* Out of luck. Find a PTE to sacrifice.
*/
/* Lock out all insertions for a bit */
if (!rw_try_upgrade(&moea64_eviction_lock)) {
rw_runlock(&moea64_eviction_lock);
rw_wlock(&moea64_eviction_lock);
}
/* Don't evict large pages */
ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_BIG);
rw_wunlock(&moea64_eviction_lock);
/* No freeable slots in either PTEG? We're hosed. */
if (ret == -1)
panic("moea64_pte_insert: overflow");
} else
rw_runlock(&moea64_eviction_lock);
return (0);
}
static void *
moea64_dump_pmap_native(void *ctx, void *buf, u_long *nbytes)
{
@ -846,3 +899,134 @@ moea64_dump_pmap_native(void *ctx, void *buf, u_long *nbytes)
dctx->ptex = ptex_end;
return (__DEVOLATILE(struct lpte *, moea64_pteg_table) + ptex);
}
static __always_inline uint64_t
moea64_vpn_from_pte(uint64_t ptehi, uintptr_t slot)
{
uint64_t pgn, pgnlo, vsid;
vsid = (ptehi & LPTE_AVA_MASK) >> LPTE_VSID_SHIFT;
if ((ptehi & LPTE_HID) != 0)
slot ^= (moea64_pteg_mask << 3);
pgnlo = ((vsid & VSID_HASH_MASK) ^ (slot >> 3)) & EA_PAGELO_MASK;
pgn = ((ptehi & LPTE_AVA_PGNHI_MASK) << (EA_PAGELO_SHIFT -
LPTE_AVA_PGNHI_SHIFT)) | pgnlo;
return ((vsid << 16) | pgn);
}
static __always_inline int64_t
moea64_pte_unset_sp_locked(struct pvo_entry *pvo)
{
volatile struct lpte *pt;
uint64_t ptehi, refchg, vpn;
vm_offset_t eva;
pmap_t pm;
pm = pvo->pvo_pmap;
refchg = 0;
eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
for (; pvo != NULL && PVO_VADDR(pvo) < eva;
pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
pt = moea64_pteg_table + pvo->pvo_pte.slot;
ptehi = be64toh(pt->pte_hi);
if ((ptehi & LPTE_AVPN_MASK) !=
moea64_pte_vpn_from_pvo_vpn(pvo)) {
/* Evicted: invalidate new entry */
STAT_MOEA64(moea64_pte_overflow--);
vpn = moea64_vpn_from_pte(ptehi, pvo->pvo_pte.slot);
CTR1(KTR_PMAP, "Evicted page in pte_unset_sp: vpn=%jx",
(uintmax_t)vpn);
/* Assume evicted page was modified */
refchg |= LPTE_CHG;
} else
vpn = pvo->pvo_vpn;
refchg |= moea64_pte_unset_locked(pt, vpn);
}
return (refchg);
}
static int64_t
moea64_pte_unset_sp_native(struct pvo_entry *pvo)
{
uint64_t refchg;
PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
rw_rlock(&moea64_eviction_lock);
refchg = moea64_pte_unset_sp_locked(pvo);
rw_runlock(&moea64_eviction_lock);
return (refchg);
}
static __always_inline int64_t
moea64_pte_insert_sp_locked(struct pvo_entry *pvo)
{
struct lpte insertpt;
int64_t ret;
vm_offset_t eva;
pmap_t pm;
pm = pvo->pvo_pmap;
eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
for (; pvo != NULL && PVO_VADDR(pvo) < eva;
pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
moea64_pte_from_pvo(pvo, &insertpt);
pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */
ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_VALID);
if (ret == -1) {
/* Lock out all insertions for a bit */
if (!rw_try_upgrade(&moea64_eviction_lock)) {
rw_runlock(&moea64_eviction_lock);
rw_wlock(&moea64_eviction_lock);
}
/* Don't evict large pages */
ret = moea64_pte_insert_locked(pvo, &insertpt,
LPTE_BIG);
rw_downgrade(&moea64_eviction_lock);
/* No freeable slots in either PTEG? We're hosed. */
if (ret == -1)
panic("moea64_pte_insert_sp: overflow");
}
}
return (0);
}
static int64_t
moea64_pte_insert_sp_native(struct pvo_entry *pvo)
{
PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
rw_rlock(&moea64_eviction_lock);
moea64_pte_insert_sp_locked(pvo);
rw_runlock(&moea64_eviction_lock);
return (0);
}
static int64_t
moea64_pte_replace_sp_native(struct pvo_entry *pvo)
{
uint64_t refchg;
PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
rw_rlock(&moea64_eviction_lock);
refchg = moea64_pte_unset_sp_locked(pvo);
moea64_pte_insert_sp_locked(pvo);
rw_runlock(&moea64_eviction_lock);
return (refchg);
}

View File

@ -148,8 +148,8 @@ RB_PROTOTYPE(pvo_tree, pvo_entry, pvo_plink, pvo_vaddr_compare);
#define PVO_MANAGED 0x020UL /* PVO entry is managed */
#define PVO_BOOTSTRAP 0x080UL /* PVO entry allocated during
bootstrap */
#define PVO_DEAD 0x100UL /* waiting to be deleted */
#define PVO_LARGE 0x200UL /* large page */
#define PVO_DEAD 0x100UL /* waiting to be deleted */
#define PVO_LARGE 0x200UL /* large page */
#define PVO_VADDR(pvo) ((pvo)->pvo_vaddr & ~ADDR_POFF)
#define PVO_PTEGIDX_GET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_MASK)
#define PVO_PTEGIDX_ISSET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_VALID)

View File

@ -111,6 +111,7 @@ typedef struct lpte lpte_t;
/* High quadword: */
#define LPTE_VSID_SHIFT 12
#define LPTE_AVPN_MASK 0xFFFFFFFFFFFFFF80ULL
#define LPTE_AVA_MASK 0x3FFFFFFFFFFFFF80ULL
#define LPTE_API 0x0000000000000F80ULL
#define LPTE_SWBITS 0x0000000000000078ULL
#define LPTE_WIRED 0x0000000000000010ULL
@ -120,8 +121,13 @@ typedef struct lpte lpte_t;
#define LPTE_VALID 0x0000000000000001ULL
/* Low quadword: */
#define LP_4K_16M 0x38 /* 4KB base, 16MB actual page size */
#define EXTEND_PTE(x) UINT64_C(x) /* make constants 64-bit */
#define LPTE_RPGN 0xfffffffffffff000ULL
#define LPTE_LP_MASK 0x00000000000ff000ULL
#define LPTE_LP_SHIFT 12
#define LPTE_LP_4K_16M ((unsigned long long)(LP_4K_16M) << LPTE_LP_SHIFT)
#define LPTE_REF EXTEND_PTE( PTE_REF )
#define LPTE_CHG EXTEND_PTE( PTE_CHG )
#define LPTE_WIMG EXTEND_PTE( PTE_WIMG )
@ -139,6 +145,12 @@ typedef struct lpte lpte_t;
#define LPTE_RW LPTE_BW
#define LPTE_RO LPTE_BR
/* HPT superpage definitions */
#define HPT_SP_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT)
#define HPT_SP_SIZE (1 << HPT_SP_SHIFT)
#define HPT_SP_MASK (HPT_SP_SIZE - 1)
#define HPT_SP_PAGES (1 << VM_LEVEL_0_ORDER)
/* POWER ISA 3.0 Radix Table Definitions */
#define RPTE_VALID 0x8000000000000000ULL
#define RPTE_LEAF 0x4000000000000000ULL /* is a PTE: always 1 */

View File

@ -64,6 +64,14 @@
#define SLBE_ESID_MASK 0xfffffffff0000000UL /* Effective segment ID mask */
#define SLBE_ESID_SHIFT 28
/*
* SLB page sizes encoding, as present in property ibm,segment-page-sizes
* of CPU device tree node.
*
* See LoPAPR: CPU Node Properties, section C.6.1.4.
*/
#define SLB_PGSZ_4K_4K 0
/* Virtual real-mode VSID in LPARs */
#define VSID_VRMA 0x1ffffff

View File

@ -185,31 +185,34 @@ struct pmap_physseg {
#define VM_NFREELIST 1
#define VM_FREELIST_DEFAULT 0
/*
* The largest allocation size is 4MB.
*/
#ifdef __powerpc64__
/* The largest allocation size is 16MB. */
#define VM_NFREEORDER 13
#else
/* The largest allocation size is 4MB. */
#define VM_NFREEORDER 11
#endif
#ifndef VM_NRESERVLEVEL
#ifdef __powerpc64__
/* Enable superpage reservations: 1 level. */
#define VM_NRESERVLEVEL 1
#else
/*
* Disable superpage reservations.
*/
/* Disable superpage reservations. */
#define VM_NRESERVLEVEL 0
#endif
#endif
/*
* Level 0 reservations consist of 512 pages.
*/
#ifndef VM_LEVEL_0_ORDER
#define VM_LEVEL_0_ORDER 9
/* Level 0 reservations consist of 512 (RPT) or 4096 (HPT) pages. */
#define VM_LEVEL_0_ORDER vm_level_0_order
#ifndef __ASSEMBLER__
extern int vm_level_0_order;
#endif
#endif
#ifndef VM_LEVEL_0_ORDER_MAX
#define VM_LEVEL_0_ORDER_MAX 12
#endif
#ifdef __powerpc64__

View File

@ -141,6 +141,7 @@ powernv_attach(platform_t plat)
phandle_t opal;
int res, len, idx;
register_t msr;
bool has_lp;
/* Ping OPAL again just to make sure */
opal_check();
@ -228,6 +229,7 @@ powernv_attach(platform_t plat)
sizeof(arr));
len /= 4;
idx = 0;
has_lp = false;
while (len > 0) {
shift = arr[idx];
slb_encoding = arr[idx + 1];
@ -238,17 +240,21 @@ powernv_attach(platform_t plat)
lp_size = arr[idx];
lp_encoding = arr[idx+1];
if (slb_encoding == SLBV_L && lp_encoding == 0)
break;
has_lp = true;
if (slb_encoding == SLB_PGSZ_4K_4K &&
lp_encoding == LP_4K_16M)
moea64_has_lp_4k_16m = true;
idx += 2;
len -= 2;
nptlp--;
}
if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0)
if (has_lp && moea64_has_lp_4k_16m)
break;
}
if (len == 0)
if (!has_lp)
panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) "
"not supported by this system.");

View File

@ -77,6 +77,8 @@ vm_offset_t virtual_end;
caddr_t crashdumpmap;
int pmap_bootstrapped;
/* Default level 0 reservations consist of 512 pages (2MB superpage). */
int vm_level_0_order = 9;
#ifdef AIM
int

View File

@ -82,6 +82,9 @@ static int64_t mphyp_pte_synch(struct pvo_entry *pvo);
static int64_t mphyp_pte_clear(struct pvo_entry *pvo, uint64_t ptebit);
static int64_t mphyp_pte_unset(struct pvo_entry *pvo);
static int64_t mphyp_pte_insert(struct pvo_entry *pvo);
static int64_t mphyp_pte_unset_sp(struct pvo_entry *pvo);
static int64_t mphyp_pte_insert_sp(struct pvo_entry *pvo);
static int64_t mphyp_pte_replace_sp(struct pvo_entry *pvo);
static struct pmap_funcs mphyp_methods = {
.install = mphyp_install,
@ -95,6 +98,9 @@ static struct moea64_funcs mmu_phyp_funcs = {
.pte_clear = mphyp_pte_clear,
.pte_unset = mphyp_pte_unset,
.pte_insert = mphyp_pte_insert,
.pte_unset_sp = mphyp_pte_unset_sp,
.pte_insert_sp = mphyp_pte_insert_sp,
.pte_replace_sp = mphyp_pte_replace_sp,
};
MMU_DEF_INHERIT(pseries_mmu, "mmu_phyp", mphyp_methods, oea64_mmu);
@ -135,6 +141,7 @@ mphyp_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
uint64_t vsid;
phandle_t dev, node, root;
int idx, len, res;
bool has_lp;
rm_init(&mphyp_eviction_lock, "pte eviction");
@ -199,6 +206,7 @@ mphyp_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
sizeof(arr));
len /= 4;
idx = 0;
has_lp = false;
while (len > 0) {
shift = arr[idx];
slb_encoding = arr[idx + 1];
@ -220,18 +228,22 @@ mphyp_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
lp_encoding);
if (slb_encoding == SLBV_L && lp_encoding == 0)
break;
has_lp = true;
if (slb_encoding == SLB_PGSZ_4K_4K &&
lp_encoding == LP_4K_16M)
moea64_has_lp_4k_16m = true;
idx += 2;
len -= 2;
nptlp--;
}
dprintf("\n");
if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0)
if (has_lp && moea64_has_lp_4k_16m)
break;
}
if (len > 0) {
if (has_lp) {
moea64_large_page_shift = shift;
moea64_large_page_size = 1ULL << lp_size;
moea64_large_page_mask = moea64_large_page_size - 1;
@ -393,7 +405,7 @@ mphyp_pte_spillable_ident(uintptr_t ptegbase, struct lpte *to_evict)
phyp_pft_hcall(H_READ, 0, slot, 0, 0, &pt.pte_hi,
&pt.pte_lo, &junk);
if (pt.pte_hi & LPTE_WIRED)
if ((pt.pte_hi & (LPTE_WIRED | LPTE_BIG)) != 0)
continue;
/* This is a candidate, so remember it */
@ -414,68 +426,61 @@ mphyp_pte_spillable_ident(uintptr_t ptegbase, struct lpte *to_evict)
return (k);
}
static int64_t
mphyp_pte_insert(struct pvo_entry *pvo)
static __inline int64_t
mphyp_pte_insert_locked(struct pvo_entry *pvo, struct lpte *pte)
{
struct rm_priotracker track;
struct lpte evicted;
uint64_t index, junk;
int64_t result;
struct lpte evicted, pte;
uint64_t index, junk, lastptelo;
PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
/* Initialize PTE */
moea64_pte_from_pvo(pvo, &pte);
evicted.pte_hi = 0;
/* Make sure further insertion is locked out during evictions */
rm_rlock(&mphyp_eviction_lock, &track);
/*
* First try primary hash.
*/
pvo->pvo_pte.slot &= ~7UL; /* Base slot address */
result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte.pte_hi,
pte.pte_lo, &index, &evicted.pte_lo, &junk);
result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte->pte_hi,
pte->pte_lo, &index, &evicted.pte_lo, &junk);
if (result == H_SUCCESS) {
rm_runlock(&mphyp_eviction_lock, &track);
pvo->pvo_pte.slot = index;
return (0);
}
KASSERT(result == H_PTEG_FULL, ("Page insertion error: %ld "
"(ptegidx: %#zx/%#lx, PTE %#lx/%#lx", result, pvo->pvo_pte.slot,
moea64_pteg_count, pte.pte_hi, pte.pte_lo));
moea64_pteg_count, pte->pte_hi, pte->pte_lo));
/*
* Next try secondary hash.
*/
pvo->pvo_vaddr ^= PVO_HID;
pte.pte_hi ^= LPTE_HID;
pte->pte_hi ^= LPTE_HID;
pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot,
pte.pte_hi, pte.pte_lo, &index, &evicted.pte_lo, &junk);
pte->pte_hi, pte->pte_lo, &index, &evicted.pte_lo, &junk);
if (result == H_SUCCESS) {
rm_runlock(&mphyp_eviction_lock, &track);
pvo->pvo_pte.slot = index;
return (0);
}
KASSERT(result == H_PTEG_FULL, ("Secondary page insertion error: %ld",
result));
/*
* Out of luck. Find a PTE to sacrifice.
*/
return (-1);
}
/* Lock out all insertions for a bit */
rm_runlock(&mphyp_eviction_lock, &track);
rm_wlock(&mphyp_eviction_lock);
static __inline int64_t
mphyp_pte_evict_and_insert_locked(struct pvo_entry *pvo, struct lpte *pte)
{
struct lpte evicted;
uint64_t index, junk, lastptelo;
int64_t result;
evicted.pte_hi = 0;
index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted);
if (index == -1L) {
/* Try other hash table? */
pvo->pvo_vaddr ^= PVO_HID;
pte.pte_hi ^= LPTE_HID;
pte->pte_hi ^= LPTE_HID;
pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted);
}
@ -500,18 +505,50 @@ mphyp_pte_insert(struct pvo_entry *pvo)
/*
* Set the new PTE.
*/
result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pte.pte_hi,
pte.pte_lo, &index, &evicted.pte_lo, &junk);
rm_wunlock(&mphyp_eviction_lock); /* All clear */
result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pte->pte_hi,
pte->pte_lo, &index, &evicted.pte_lo, &junk);
pvo->pvo_pte.slot = index;
if (result == H_SUCCESS)
return (0);
rm_wunlock(&mphyp_eviction_lock);
panic("Page replacement error: %ld", result);
return (result);
}
static int64_t
mphyp_pte_insert(struct pvo_entry *pvo)
{
struct rm_priotracker track;
int64_t ret;
struct lpte pte;
PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
/* Initialize PTE */
moea64_pte_from_pvo(pvo, &pte);
/* Make sure further insertion is locked out during evictions */
rm_rlock(&mphyp_eviction_lock, &track);
ret = mphyp_pte_insert_locked(pvo, &pte);
rm_runlock(&mphyp_eviction_lock, &track);
if (ret == -1) {
/*
* Out of luck. Find a PTE to sacrifice.
*/
/* Lock out all insertions for a bit */
rm_wlock(&mphyp_eviction_lock);
ret = mphyp_pte_evict_and_insert_locked(pvo, &pte);
rm_wunlock(&mphyp_eviction_lock); /* All clear */
}
return (ret);
}
static void *
mphyp_dump_pmap(void *ctx, void *buf, u_long *nbytes)
{
@ -541,3 +578,91 @@ mphyp_dump_pmap(void *ctx, void *buf, u_long *nbytes)
dctx->ptex = ptex;
return (buf);
}
static int64_t
mphyp_pte_unset_sp(struct pvo_entry *pvo)
{
struct lpte pte;
uint64_t junk, refchg;
int err;
vm_offset_t eva;
pmap_t pm;
pm = pvo->pvo_pmap;
PMAP_LOCK_ASSERT(pm, MA_OWNED);
KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
refchg = 0;
eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
for (; pvo != NULL && PVO_VADDR(pvo) < eva;
pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
moea64_pte_from_pvo(pvo, &pte);
err = phyp_pft_hcall(H_REMOVE, H_AVPN, pvo->pvo_pte.slot,
pte.pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo,
&junk);
KASSERT(err == H_SUCCESS || err == H_NOT_FOUND,
("Error removing page: %d", err));
if (err == H_NOT_FOUND)
STAT_MOEA64(moea64_pte_overflow--);
refchg |= pte.pte_lo & (LPTE_REF | LPTE_CHG);
}
return (refchg);
}
static int64_t
mphyp_pte_insert_sp(struct pvo_entry *pvo)
{
struct rm_priotracker track;
int64_t ret;
struct lpte pte;
vm_offset_t eva;
pmap_t pm;
pm = pvo->pvo_pmap;
PMAP_LOCK_ASSERT(pm, MA_OWNED);
KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
/* Make sure further insertion is locked out during evictions */
rm_rlock(&mphyp_eviction_lock, &track);
for (; pvo != NULL && PVO_VADDR(pvo) < eva;
pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
/* Initialize PTE */
moea64_pte_from_pvo(pvo, &pte);
ret = mphyp_pte_insert_locked(pvo, &pte);
if (ret == -1) {
/*
* Out of luck. Find a PTE to sacrifice.
*/
/* Lock out all insertions for a bit */
rm_runlock(&mphyp_eviction_lock, &track);
rm_wlock(&mphyp_eviction_lock);
mphyp_pte_evict_and_insert_locked(pvo, &pte);
rm_wunlock(&mphyp_eviction_lock); /* All clear */
rm_rlock(&mphyp_eviction_lock, &track);
}
}
rm_runlock(&mphyp_eviction_lock, &track);
return (0);
}
static int64_t
mphyp_pte_replace_sp(struct pvo_entry *pvo)
{
int64_t refchg;
refchg = mphyp_pte_unset_sp(pvo);
mphyp_pte_insert_sp(pvo);
return (refchg);
}

View File

@ -542,7 +542,8 @@ vm_fault_populate(struct faultstate *fs)
pidx += npages, m = vm_page_next(&m[npages - 1])) {
vaddr = fs->entry->start + IDX_TO_OFF(pidx) - fs->entry->offset;
#if defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \
__ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv)
__ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv) || \
defined(__powerpc64__)
psind = m->psind;
if (psind > 0 && ((vaddr & (pagesizes[psind] - 1)) != 0 ||
pidx + OFF_TO_IDX(pagesizes[psind]) - 1 > pager_last ||