diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 9edce97fc009..2ca95a8bbb34 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -5362,6 +5362,84 @@ mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp) *mvp = NULL; } +/* + * Relock the mp mount vnode list lock with the vp vnode interlock in the + * conventional lock order during mnt_vnode_next_active iteration. + * + * On entry, the mount vnode list lock is held and the vnode interlock is not. + * The list lock is dropped and reacquired. On success, both locks are held. + * On failure, the mount vnode list lock is held but the vnode interlock is + * not, and the procedure may have yielded. + */ +static bool +mnt_vnode_next_active_relock(struct vnode *mvp, struct mount *mp, + struct vnode *vp) +{ + const struct vnode *tmp; + bool held, ret; + + VNASSERT(mvp->v_mount == mp && mvp->v_type == VMARKER && + TAILQ_NEXT(mvp, v_actfreelist) != NULL, mvp, + ("%s: bad marker", __func__)); + VNASSERT(vp->v_mount == mp && vp->v_type != VMARKER, vp, + ("%s: inappropriate vnode", __func__)); + ASSERT_VI_UNLOCKED(vp, __func__); + mtx_assert(&mp->mnt_listmtx, MA_OWNED); + + ret = false; + + TAILQ_REMOVE(&mp->mnt_activevnodelist, mvp, v_actfreelist); + TAILQ_INSERT_BEFORE(vp, mvp, v_actfreelist); + + /* + * Use a hold to prevent vp from disappearing while the mount vnode + * list lock is dropped and reacquired. Normally a hold would be + * acquired with vhold(), but that might try to acquire the vnode + * interlock, which would be a LOR with the mount vnode list lock. + */ + held = vfs_refcount_acquire_if_not_zero(&vp->v_holdcnt); + mtx_unlock(&mp->mnt_listmtx); + if (!held) + goto abort; + VI_LOCK(vp); + if (!vfs_refcount_release_if_not_last(&vp->v_holdcnt)) { + vdropl(vp); + goto abort; + } + mtx_lock(&mp->mnt_listmtx); + + /* + * Determine whether the vnode is still the next one after the marker, + * excepting any other markers. If the vnode has not been doomed by + * vgone() then the hold should have ensured that it remained on the + * active list. If it has been doomed but is still on the active list, + * don't abort, but rather skip over it (avoid spinning on doomed + * vnodes). + */ + tmp = mvp; + do { + tmp = TAILQ_NEXT(tmp, v_actfreelist); + } while (tmp != NULL && tmp->v_type == VMARKER); + if (tmp != vp) { + mtx_unlock(&mp->mnt_listmtx); + VI_UNLOCK(vp); + goto abort; + } + + ret = true; + goto out; +abort: + maybe_yield(); + mtx_lock(&mp->mnt_listmtx); +out: + if (ret) + ASSERT_VI_LOCKED(vp, __func__); + else + ASSERT_VI_UNLOCKED(vp, __func__); + mtx_assert(&mp->mnt_listmtx, MA_OWNED); + return (ret); +} + static struct vnode * mnt_vnode_next_active(struct vnode **mvp, struct mount *mp) { @@ -5371,22 +5449,19 @@ mnt_vnode_next_active(struct vnode **mvp, struct mount *mp) KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); restart: vp = TAILQ_NEXT(*mvp, v_actfreelist); - TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist); while (vp != NULL) { if (vp->v_type == VMARKER) { vp = TAILQ_NEXT(vp, v_actfreelist); continue; } - if (!VI_TRYLOCK(vp)) { - if (mp_ncpus == 1 || should_yield()) { - TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist); - mtx_unlock(&mp->mnt_listmtx); - pause("vnacti", 1); - mtx_lock(&mp->mnt_listmtx); - goto restart; - } - continue; - } + /* + * Try-lock because this is the wrong lock order. If that does + * not succeed, drop the mount vnode list lock and try to + * reacquire it and the vnode interlock in the right order. + */ + if (!VI_TRYLOCK(vp) && + !mnt_vnode_next_active_relock(*mvp, mp, vp)) + goto restart; KASSERT(vp->v_type != VMARKER, ("locked marker %p", vp)); KASSERT(vp->v_mount == mp || vp->v_mount == NULL, ("alien vnode on the active list %p %p", vp, mp)); @@ -5396,6 +5471,7 @@ mnt_vnode_next_active(struct vnode **mvp, struct mount *mp) VI_UNLOCK(vp); vp = nvp; } + TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist); /* Check if we are done */ if (vp == NULL) {