diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c index 58cde434b8f7..c8b0684d1951 100644 --- a/sys/kern/vfs_init.c +++ b/sys/kern/vfs_init.c @@ -200,6 +200,17 @@ vfs_root_sigdefer(struct mount *mp, int flags, struct vnode **vpp) return (rc); } +static int +vfs_cachedroot_sigdefer(struct mount *mp, int flags, struct vnode **vpp) +{ + int prev_stops, rc; + + prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); + rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_cachedroot)(mp, flags, vpp); + sigallowstop(prev_stops); + return (rc); +} + static int vfs_quotactl_sigdefer(struct mount *mp, int cmd, uid_t uid, void *arg) { @@ -343,6 +354,7 @@ static struct vfsops vfsops_sigdefer = { .vfs_mount = vfs_mount_sigdefer, .vfs_unmount = vfs_unmount_sigdefer, .vfs_root = vfs_root_sigdefer, + .vfs_cachedroot = vfs_cachedroot_sigdefer, .vfs_quotactl = vfs_quotactl_sigdefer, .vfs_statfs = vfs_statfs_sigdefer, .vfs_sync = vfs_sync_sigdefer, diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index 258b40a69859..8c77d70877d6 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -134,6 +134,7 @@ mount_init(void *mem, int size, int flags) M_WAITOK | M_ZERO); mp->mnt_ref = 0; mp->mnt_vfs_ops = 1; + mp->mnt_rootvnode = NULL; return (0); } @@ -582,6 +583,10 @@ vfs_mount_destroy(struct mount *mp) panic("%s: vfs_ops should be 1 but %d found\n", __func__, mp->mnt_vfs_ops); + if (mp->mnt_rootvnode != NULL) + panic("%s: mount point still has a root vnode %p\n", __func__, + mp->mnt_rootvnode); + if (mp->mnt_vnodecovered != NULL) vrele(mp->mnt_vnodecovered); #ifdef MAC @@ -1034,6 +1039,7 @@ vfs_domount_update( ) { struct export_args export; + struct vnode *rootvp; void *bufp; struct mount *mp; int error, export_error, len; @@ -1099,7 +1105,10 @@ vfs_domount_update( MNT_SNAPSHOT | MNT_ROOTFS | MNT_UPDATEMASK | MNT_RDONLY); if ((mp->mnt_flag & MNT_ASYNC) == 0) mp->mnt_kern_flag &= ~MNTK_ASYNC; + rootvp = vfs_cache_root_clear(mp); MNT_IUNLOCK(mp); + if (rootvp != NULL) + vrele(rootvp); mp->mnt_optnew = *optlist; vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); @@ -1582,7 +1591,7 @@ vfs_mount_fetch_counter(struct mount *mp, enum mount_counter which) int dounmount(struct mount *mp, int flags, struct thread *td) { - struct vnode *coveredvp; + struct vnode *coveredvp, *rootvp; int error; uint64_t async_flag; int mnt_gen_r; @@ -1630,12 +1639,15 @@ dounmount(struct mount *mp, int flags, struct thread *td) return (EBUSY); } mp->mnt_kern_flag |= MNTK_UNMOUNT; + rootvp = vfs_cache_root_clear(mp); if (flags & MNT_NONBUSY) { MNT_IUNLOCK(mp); error = vfs_check_usecounts(mp); MNT_ILOCK(mp); if (error != 0) { dounmount_cleanup(mp, coveredvp, MNTK_UNMOUNT); + if (rootvp != NULL) + vrele(rootvp); return (error); } } @@ -1664,6 +1676,9 @@ dounmount(struct mount *mp, int flags, struct thread *td) ("%s: invalid return value for msleep in the drain path @ %s:%d", __func__, __FILE__, __LINE__)); + if (rootvp != NULL) + vrele(rootvp); + if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 741f18fa7c65..acee7c15cc1b 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -5699,6 +5699,121 @@ vfs_unixify_accmode(accmode_t *accmode) return (0); } +/* + * Clear out a doomed vnode (if any) and replace it with a new one as long + * as the fs is not being unmounted. Return the root vnode to the caller. + */ +static int __noinline +vfs_cache_root_fallback(struct mount *mp, int flags, struct vnode **vpp) +{ + struct vnode *vp; + int error; + +restart: + if (mp->mnt_rootvnode != NULL) { + MNT_ILOCK(mp); + vp = mp->mnt_rootvnode; + if (vp != NULL) { + if ((vp->v_iflag & VI_DOOMED) == 0) { + vrefact(vp); + MNT_IUNLOCK(mp); + error = vn_lock(vp, flags); + if (error == 0) { + *vpp = vp; + return (0); + } + vrele(vp); + goto restart; + } + /* + * Clear the old one. + */ + mp->mnt_rootvnode = NULL; + } + MNT_IUNLOCK(mp); + if (vp != NULL) { + /* + * Paired with a fence in vfs_op_thread_exit(). + */ + atomic_thread_fence_acq(); + vfs_op_barrier_wait(mp); + vrele(vp); + } + } + error = VFS_CACHEDROOT(mp, flags, vpp); + if (error != 0) + return (error); + if (mp->mnt_vfs_ops == 0) { + MNT_ILOCK(mp); + if (mp->mnt_vfs_ops != 0) { + MNT_IUNLOCK(mp); + return (0); + } + if (mp->mnt_rootvnode == NULL) { + vrefact(*vpp); + mp->mnt_rootvnode = *vpp; + } else { + if (mp->mnt_rootvnode != *vpp) { + if ((mp->mnt_rootvnode->v_iflag & VI_DOOMED) == 0) { + panic("%s: mismatch between vnode returned " + " by VFS_CACHEDROOT and the one cached " + " (%p != %p)", + __func__, *vpp, mp->mnt_rootvnode); + } + } + } + MNT_IUNLOCK(mp); + } + return (0); +} + +int +vfs_cache_root(struct mount *mp, int flags, struct vnode **vpp) +{ + struct vnode *vp; + int error; + + if (!vfs_op_thread_enter(mp)) + return (vfs_cache_root_fallback(mp, flags, vpp)); + vp = (struct vnode *)atomic_load_ptr(&mp->mnt_rootvnode); + if (vp == NULL || (vp->v_iflag & VI_DOOMED)) { + vfs_op_thread_exit(mp); + return (vfs_cache_root_fallback(mp, flags, vpp)); + } + vrefact(vp); + vfs_op_thread_exit(mp); + error = vn_lock(vp, flags); + if (error != 0) { + vrele(vp); + return (vfs_cache_root_fallback(mp, flags, vpp)); + } + *vpp = vp; + return (0); +} + +struct vnode * +vfs_cache_root_clear(struct mount *mp) +{ + struct vnode *vp; + + /* + * ops > 0 guarantees there is nobody who can see this vnode + */ + MPASS(mp->mnt_vfs_ops > 0); + vp = mp->mnt_rootvnode; + mp->mnt_rootvnode = NULL; + return (vp); +} + +void +vfs_cache_root_set(struct mount *mp, struct vnode *vp) +{ + + MPASS(mp->mnt_vfs_ops > 0); + vrefact(vp); + mp->mnt_rootvnode = vp; +} + /* * These are helper functions for filesystems to traverse all * their vnodes. See MNT_VNODE_FOREACH_ALL() in sys/mount.h. diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 4b60055ca6dd..e9cdd63e37fc 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -231,6 +231,7 @@ struct mount { int *mnt_ref_pcpu; int *mnt_lockref_pcpu; int *mnt_writeopcount_pcpu; + struct vnode *mnt_rootvnode; }; /* @@ -702,6 +703,7 @@ struct vfsops { vfs_cmount_t *vfs_cmount; vfs_unmount_t *vfs_unmount; vfs_root_t *vfs_root; + vfs_root_t *vfs_cachedroot; vfs_quotactl_t *vfs_quotactl; vfs_statfs_t *vfs_statfs; vfs_sync_t *vfs_sync; @@ -741,6 +743,12 @@ vfs_statfs_t __vfs_statfs; _rc = (*(MP)->mnt_op->vfs_root)(MP, FLAGS, VPP); \ _rc; }) +#define VFS_CACHEDROOT(MP, FLAGS, VPP) ({ \ + int _rc; \ + \ + _rc = (*(MP)->mnt_op->vfs_cachedroot)(MP, FLAGS, VPP); \ + _rc; }) + #define VFS_QUOTACTL(MP, C, U, A) ({ \ int _rc; \ \ @@ -950,6 +958,9 @@ vfs_sysctl_t vfs_stdsysctl; void syncer_suspend(void); void syncer_resume(void); +struct vnode *vfs_cache_root_clear(struct mount *); +void vfs_cache_root_set(struct mount *, struct vnode *); + void vfs_op_barrier_wait(struct mount *); void vfs_op_enter(struct mount *); void vfs_op_exit_locked(struct mount *); diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 967b1c066c95..2649a63fad54 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -746,6 +746,7 @@ int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize, rangelock_trywlock(&(vp)->v_rl, (start), (end), VI_MTX(vp)) int vfs_cache_lookup(struct vop_lookup_args *ap); +int vfs_cache_root(struct mount *mp, int flags, struct vnode **vpp); void vfs_timestamp(struct timespec *); void vfs_write_resume(struct mount *mp, int flags); int vfs_write_suspend(struct mount *mp, int flags);