1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-24 11:29:10 +00:00

mnt_vnode_next_active: use conventional lock order when trylock fails.

Previously, when the VI_TRYLOCK failed, we would spin under the mutex
that protects the vnode active list until we either succeeded or
noticed that we had hogged the CPU. Since we were violating the lock
order, this would guarantee that we would become a hog under any
deadlock condition (e.g. a race with vdrop(9) on the same vnode). In
the presence of many concurrent threads in sync(2) or vdrop etc, the
victim could hang for a long time.

Now, avoid spinning by dropping and reacquiring the locks in the
conventional lock order when the trylock fails. This requires a dance
with the vnode hold count.

Submitted by:	Tom Rix <trix@juniper.net>
Tested by:	pho
Differential revision:	https://reviews.freebsd.org/D10692
This commit is contained in:
Konstantin Belousov 2017-05-15 10:02:45 +00:00
parent 344add7203
commit 391aba32e6
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=318285

View File

@ -5362,6 +5362,84 @@ mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp)
*mvp = NULL;
}
/*
* Relock the mp mount vnode list lock with the vp vnode interlock in the
* conventional lock order during mnt_vnode_next_active iteration.
*
* On entry, the mount vnode list lock is held and the vnode interlock is not.
* The list lock is dropped and reacquired. On success, both locks are held.
* On failure, the mount vnode list lock is held but the vnode interlock is
* not, and the procedure may have yielded.
*/
static bool
mnt_vnode_next_active_relock(struct vnode *mvp, struct mount *mp,
struct vnode *vp)
{
const struct vnode *tmp;
bool held, ret;
VNASSERT(mvp->v_mount == mp && mvp->v_type == VMARKER &&
TAILQ_NEXT(mvp, v_actfreelist) != NULL, mvp,
("%s: bad marker", __func__));
VNASSERT(vp->v_mount == mp && vp->v_type != VMARKER, vp,
("%s: inappropriate vnode", __func__));
ASSERT_VI_UNLOCKED(vp, __func__);
mtx_assert(&mp->mnt_listmtx, MA_OWNED);
ret = false;
TAILQ_REMOVE(&mp->mnt_activevnodelist, mvp, v_actfreelist);
TAILQ_INSERT_BEFORE(vp, mvp, v_actfreelist);
/*
* Use a hold to prevent vp from disappearing while the mount vnode
* list lock is dropped and reacquired. Normally a hold would be
* acquired with vhold(), but that might try to acquire the vnode
* interlock, which would be a LOR with the mount vnode list lock.
*/
held = vfs_refcount_acquire_if_not_zero(&vp->v_holdcnt);
mtx_unlock(&mp->mnt_listmtx);
if (!held)
goto abort;
VI_LOCK(vp);
if (!vfs_refcount_release_if_not_last(&vp->v_holdcnt)) {
vdropl(vp);
goto abort;
}
mtx_lock(&mp->mnt_listmtx);
/*
* Determine whether the vnode is still the next one after the marker,
* excepting any other markers. If the vnode has not been doomed by
* vgone() then the hold should have ensured that it remained on the
* active list. If it has been doomed but is still on the active list,
* don't abort, but rather skip over it (avoid spinning on doomed
* vnodes).
*/
tmp = mvp;
do {
tmp = TAILQ_NEXT(tmp, v_actfreelist);
} while (tmp != NULL && tmp->v_type == VMARKER);
if (tmp != vp) {
mtx_unlock(&mp->mnt_listmtx);
VI_UNLOCK(vp);
goto abort;
}
ret = true;
goto out;
abort:
maybe_yield();
mtx_lock(&mp->mnt_listmtx);
out:
if (ret)
ASSERT_VI_LOCKED(vp, __func__);
else
ASSERT_VI_UNLOCKED(vp, __func__);
mtx_assert(&mp->mnt_listmtx, MA_OWNED);
return (ret);
}
static struct vnode *
mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
{
@ -5371,22 +5449,19 @@ mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
restart:
vp = TAILQ_NEXT(*mvp, v_actfreelist);
TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
while (vp != NULL) {
if (vp->v_type == VMARKER) {
vp = TAILQ_NEXT(vp, v_actfreelist);
continue;
}
if (!VI_TRYLOCK(vp)) {
if (mp_ncpus == 1 || should_yield()) {
TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist);
mtx_unlock(&mp->mnt_listmtx);
pause("vnacti", 1);
mtx_lock(&mp->mnt_listmtx);
goto restart;
}
continue;
}
/*
* Try-lock because this is the wrong lock order. If that does
* not succeed, drop the mount vnode list lock and try to
* reacquire it and the vnode interlock in the right order.
*/
if (!VI_TRYLOCK(vp) &&
!mnt_vnode_next_active_relock(*mvp, mp, vp))
goto restart;
KASSERT(vp->v_type != VMARKER, ("locked marker %p", vp));
KASSERT(vp->v_mount == mp || vp->v_mount == NULL,
("alien vnode on the active list %p %p", vp, mp));
@ -5396,6 +5471,7 @@ mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
VI_UNLOCK(vp);
vp = nvp;
}
TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
/* Check if we are done */
if (vp == NULL) {