eliminate locking surrounding ui_vmsize and swap reserve by using atomics

Change swap_reserve and swap_total to be in units of pages so that
swap reservations can be done using only atomics instead of using a single
global mutex for swap_reserve and a single mutex for all processes running
under the same uid for uid accounting.

Results in mmap speed up and a 70% increase in brk calls / second.

Reviewed by:	alc@, markj@, kib@
Approved by:	re (delphij@)
Differential Revision:	https://reviews.freebsd.org/D16273
This commit is contained in:
Matt Macy 2018-10-05 05:50:56 +00:00
parent 9bc603bd20
commit e8bb589d56
3 changed files with 63 additions and 59 deletions

View File

@ -1276,7 +1276,6 @@ uifind(uid_t uid)
racct_create(&new_uip->ui_racct);
refcount_init(&new_uip->ui_ref, 1);
new_uip->ui_uid = uid;
mtx_init(&new_uip->ui_vmsize_mtx, "ui_vmsize", NULL, MTX_DEF);
rw_wlock(&uihashtbl_lock);
/*
@ -1291,7 +1290,6 @@ uifind(uid_t uid)
} else {
rw_wunlock(&uihashtbl_lock);
racct_destroy(&new_uip->ui_racct);
mtx_destroy(&new_uip->ui_vmsize_mtx);
free(new_uip, M_UIDINFO);
}
return (uip);
@ -1352,7 +1350,6 @@ uifree(struct uidinfo *uip)
if (uip->ui_vmsize != 0)
printf("freeing uidinfo: uid = %d, swapuse = %lld\n",
uip->ui_uid, (unsigned long long)uip->ui_vmsize);
mtx_destroy(&uip->ui_vmsize_mtx);
free(uip, M_UIDINFO);
}

View File

@ -93,12 +93,10 @@ struct racct;
* (a) Constant from inception
* (b) Lockless, updated using atomics
* (c) Locked by global uihashtbl_lock
* (d) Locked by the ui_vmsize_mtx
*/
struct uidinfo {
LIST_ENTRY(uidinfo) ui_hash; /* (c) hash chain of uidinfos */
struct mtx ui_vmsize_mtx;
vm_ooffset_t ui_vmsize; /* (d) swap reservation by uid */
u_long ui_vmsize; /* (b) pages of swap reservation by uid */
long ui_sbsize; /* (b) socket buffer space consumed */
long ui_proccnt; /* (b) number of processes */
long ui_ptscnt; /* (b) number of pseudo-terminals */

View File

@ -151,12 +151,16 @@ static int nswapdev; /* Number of swap devices */
int swap_pager_avail;
static struct sx swdev_syscall_lock; /* serialize swap(on|off) */
static vm_ooffset_t swap_total;
SYSCTL_QUAD(_vm, OID_AUTO, swap_total, CTLFLAG_RD, &swap_total, 0,
"Total amount of available swap storage.");
static vm_ooffset_t swap_reserved;
SYSCTL_QUAD(_vm, OID_AUTO, swap_reserved, CTLFLAG_RD, &swap_reserved, 0,
static u_long swap_reserved;
static u_long swap_total;
static int sysctl_page_shift(SYSCTL_HANDLER_ARGS);
SYSCTL_PROC(_vm, OID_AUTO, swap_reserved, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
&swap_reserved, 0, sysctl_page_shift, "A",
"Amount of swap storage needed to back all allocated anonymous memory.");
SYSCTL_PROC(_vm, OID_AUTO, swap_total, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE,
&swap_total, 0, sysctl_page_shift, "A",
"Total amount of available swap storage.");
static int overcommit = 0;
SYSCTL_INT(_vm, VM_OVERCOMMIT, overcommit, CTLFLAG_RW, &overcommit, 0,
"Configure virtual memory overcommit behavior. See tuning(7) "
@ -173,6 +177,16 @@ SYSCTL_ULONG(_vm, OID_AUTO, swap_maxpages, CTLFLAG_RD, &swap_maxpages, 0,
#define SWAP_RESERVE_RLIMIT_ON (1 << 1)
#define SWAP_RESERVE_ALLOW_NONWIRED (1 << 2)
static int
sysctl_page_shift(SYSCTL_HANDLER_ARGS)
{
uint64_t newval;
u_long value = *(u_long *)arg1;
newval = ((uint64_t)value) << PAGE_SHIFT;
return (sysctl_handle_64(oidp, &newval, 0, req));
}
int
swap_reserve(vm_ooffset_t incr)
{
@ -183,7 +197,7 @@ swap_reserve(vm_ooffset_t incr)
int
swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred)
{
vm_ooffset_t r, s;
u_long r, s, prev, pincr;
int res, error;
static int curfail;
static struct timeval lastfail;
@ -191,8 +205,8 @@ swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred)
uip = cred->cr_ruidinfo;
if (incr & PAGE_MASK)
panic("swap_reserve: & PAGE_MASK");
KASSERT((incr & PAGE_MASK) == 0, ("%s: incr: %ju & PAGE_MASK", __func__,
(uintmax_t)incr));
#ifdef RACCT
if (racct_enable) {
@ -204,36 +218,33 @@ swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred)
}
#endif
pincr = atop(incr);
res = 0;
mtx_lock(&sw_dev_mtx);
r = swap_reserved + incr;
prev = atomic_fetchadd_long(&swap_reserved, pincr);
r = prev + pincr;
if (overcommit & SWAP_RESERVE_ALLOW_NONWIRED) {
s = vm_cnt.v_page_count - vm_cnt.v_free_reserved -
vm_wire_count();
s *= PAGE_SIZE;
} else
s = 0;
s += swap_total;
if ((overcommit & SWAP_RESERVE_FORCE_ON) == 0 || r <= s ||
(error = priv_check(curthread, PRIV_VM_SWAP_NOQUOTA)) == 0) {
res = 1;
swap_reserved = r;
} else {
prev = atomic_fetchadd_long(&swap_reserved, -pincr);
if (prev < pincr)
panic("swap_reserved < incr on overcommit fail");
}
mtx_unlock(&sw_dev_mtx);
if (res) {
UIDINFO_VMSIZE_LOCK(uip);
prev = atomic_fetchadd_long(&uip->ui_vmsize, pincr);
if ((overcommit & SWAP_RESERVE_RLIMIT_ON) != 0 &&
uip->ui_vmsize + incr > lim_cur(curthread, RLIMIT_SWAP) &&
priv_check(curthread, PRIV_VM_SWAP_NORLIMIT))
prev + pincr > lim_cur(curthread, RLIMIT_SWAP) &&
priv_check(curthread, PRIV_VM_SWAP_NORLIMIT)) {
res = 0;
else
uip->ui_vmsize += incr;
UIDINFO_VMSIZE_UNLOCK(uip);
if (!res) {
mtx_lock(&sw_dev_mtx);
swap_reserved -= incr;
mtx_unlock(&sw_dev_mtx);
prev = atomic_fetchadd_long(&uip->ui_vmsize, -pincr);
if (prev < pincr)
panic("uip->ui_vmsize < incr on overcommit fail");
}
}
if (!res && ppsratecheck(&lastfail, &curfail, 1)) {
@ -242,7 +253,7 @@ swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred)
}
#ifdef RACCT
if (!res) {
if (racct_enable && !res) {
PROC_LOCK(curproc);
racct_sub(curproc, RACCT_SWAP, incr);
PROC_UNLOCK(curproc);
@ -256,22 +267,20 @@ void
swap_reserve_force(vm_ooffset_t incr)
{
struct uidinfo *uip;
u_long pincr;
mtx_lock(&sw_dev_mtx);
swap_reserved += incr;
mtx_unlock(&sw_dev_mtx);
KASSERT((incr & PAGE_MASK) == 0, ("%s: incr: %ju & PAGE_MASK", __func__,
(uintmax_t)incr));
PROC_LOCK(curproc);
#ifdef RACCT
PROC_LOCK(curproc);
racct_add_force(curproc, RACCT_SWAP, incr);
PROC_UNLOCK(curproc);
if (racct_enable)
racct_add_force(curproc, RACCT_SWAP, incr);
#endif
uip = curthread->td_ucred->cr_ruidinfo;
PROC_LOCK(curproc);
UIDINFO_VMSIZE_LOCK(uip);
uip->ui_vmsize += incr;
UIDINFO_VMSIZE_UNLOCK(uip);
pincr = atop(incr);
atomic_add_long(&swap_reserved, pincr);
uip = curproc->p_ucred->cr_ruidinfo;
atomic_add_long(&uip->ui_vmsize, pincr);
PROC_UNLOCK(curproc);
}
@ -281,7 +290,7 @@ swap_release(vm_ooffset_t decr)
struct ucred *cred;
PROC_LOCK(curproc);
cred = curthread->td_ucred;
cred = curproc->p_ucred;
swap_release_by_cred(decr, cred);
PROC_UNLOCK(curproc);
}
@ -289,26 +298,26 @@ swap_release(vm_ooffset_t decr)
void
swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred)
{
u_long prev, pdecr;
struct uidinfo *uip;
uip = cred->cr_ruidinfo;
if (decr & PAGE_MASK)
panic("swap_release: & PAGE_MASK");
KASSERT((decr & PAGE_MASK) == 0, ("%s: decr: %ju & PAGE_MASK", __func__,
(uintmax_t)decr));
mtx_lock(&sw_dev_mtx);
if (swap_reserved < decr)
pdecr = atop(decr);
prev = atomic_fetchadd_long(&swap_reserved, -pdecr);
if (prev < pdecr)
panic("swap_reserved < decr");
swap_reserved -= decr;
mtx_unlock(&sw_dev_mtx);
UIDINFO_VMSIZE_LOCK(uip);
if (uip->ui_vmsize < decr)
prev = atomic_fetchadd_long(&uip->ui_vmsize, -pdecr);
if (prev < pdecr)
printf("negative vmsize for uid = %d\n", uip->ui_uid);
uip->ui_vmsize -= decr;
UIDINFO_VMSIZE_UNLOCK(uip);
racct_sub_cred(cred, RACCT_SWAP, decr);
#ifdef RACCT
if (racct_enable)
racct_sub_cred(cred, RACCT_SWAP, decr);
#endif
}
#define SWM_POP 0x01 /* pop out */
@ -2176,7 +2185,7 @@ swapon_check_swzone(void)
{
unsigned long maxpages, npages;
npages = swap_total / PAGE_SIZE;
npages = swap_total;
/* absolute maximum we can handle assuming 100% efficiency */
maxpages = uma_zone_get_max(swblk_zone) * SWAP_META_PAGES;
@ -2254,7 +2263,7 @@ swaponsomething(struct vnode *vp, void *id, u_long nblks,
TAILQ_INSERT_TAIL(&swtailq, sp, sw_list);
nswapdev++;
swap_pager_avail += nblks - 2;
swap_total += (vm_ooffset_t)nblks * PAGE_SIZE;
swap_total += nblks;
swapon_check_swzone();
swp_sizecheck();
mtx_unlock(&sw_dev_mtx);
@ -2351,7 +2360,7 @@ swapoff_one(struct swdevt *sp, struct ucred *cred)
mtx_lock(&sw_dev_mtx);
sp->sw_flags |= SW_CLOSING;
swap_pager_avail -= blist_fill(sp->sw_blist, 0, nblks);
swap_total -= (vm_ooffset_t)nblks * PAGE_SIZE;
swap_total -= nblks;
mtx_unlock(&sw_dev_mtx);
/*