From d2222aa0e949cbab3c322b4c2956130acb01e7c2 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 8 Mar 2020 00:23:36 +0000 Subject: [PATCH] fd: use smr for managing struct pwd This has a side effect of eliminating filedesc slock/sunlock during path lookup, which in turn removes contention vs concurrent modifications to the fd table. Reviewed by: markj, kib Differential Revision: https://reviews.freebsd.org/D23889 --- lib/libprocstat/libprocstat.c | 9 ++++--- sys/kern/kern_descrip.c | 51 ++++++++++++++++++++++------------- sys/kern/kern_linker.c | 22 ++++++++++++--- sys/sys/filedesc.h | 47 +++++++++++++++++++++++++++++--- sys/ufs/ffs/ffs_alloc.c | 4 ++- 5 files changed, 103 insertions(+), 30 deletions(-) diff --git a/lib/libprocstat/libprocstat.c b/lib/libprocstat/libprocstat.c index de3b674be7c8..cf51270dcedd 100644 --- a/lib/libprocstat/libprocstat.c +++ b/lib/libprocstat/libprocstat.c @@ -460,6 +460,7 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap struct file file; struct filedesc filed; struct pwd pwd; + unsigned long pwd_addr; struct vm_map_entry vmentry; struct vm_object object; struct vmspace vmspace; @@ -488,10 +489,10 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap return (NULL); } haspwd = false; - if (filed.fd_pwd != NULL) { - if (!kvm_read_all(kd, (unsigned long)filed.fd_pwd, &pwd, - sizeof(pwd))) { - warnx("can't read fd_pwd at %p", (void *)filed.fd_pwd); + pwd_addr = (unsigned long)(FILEDESC_KVM_LOAD_PWD(&filed)); + if (pwd_addr != 0) { + if (!kvm_read_all(kd, pwd_addr, &pwd, sizeof(pwd))) { + warnx("can't read fd_pwd at %p", (void *)pwd_addr); return (NULL); } haspwd = true; diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index a40e1961fcc8..0b6a3c187b98 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -101,6 +102,8 @@ MALLOC_DECLARE(M_FADVISE); static __read_mostly uma_zone_t file_zone; static __read_mostly uma_zone_t filedesc0_zone; +static __read_mostly uma_zone_t pwd_zone; +static __read_mostly smr_t pwd_smr; static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders); @@ -1985,6 +1988,7 @@ fdinit(struct filedesc *fdp, bool prepfiles) { struct filedesc0 *newfdp0; struct filedesc *newfdp; + struct pwd *newpwd; newfdp0 = uma_zalloc(filedesc0_zone, M_WAITOK | M_ZERO); newfdp = &newfdp0->fd_fd; @@ -2000,7 +2004,8 @@ fdinit(struct filedesc *fdp, bool prepfiles) newfdp->fd_files->fdt_nfiles = NDFILE; if (fdp == NULL) { - newfdp->fd_pwd = pwd_alloc(); + newpwd = pwd_alloc(); + smr_serialized_store(&newfdp->fd_pwd, newpwd, true); return (newfdp); } @@ -2008,7 +2013,8 @@ fdinit(struct filedesc *fdp, bool prepfiles) fdgrowtable(newfdp, fdp->fd_lastfile + 1); FILEDESC_SLOCK(fdp); - newfdp->fd_pwd = pwd_hold_filedesc(fdp); + newpwd = pwd_hold_filedesc(fdp); + smr_serialized_store(&newfdp->fd_pwd, newpwd, true); if (!prepfiles) { FILEDESC_SUNLOCK(fdp); @@ -2328,7 +2334,7 @@ fdescfree(struct thread *td) return; FILEDESC_XLOCK(fdp); - pwd = fdp->fd_pwd; + pwd = FILEDESC_XLOCKED_LOAD_PWD(fdp); pwd_set(fdp, NULL); FILEDESC_XUNLOCK(fdp); @@ -2341,7 +2347,7 @@ void fdescfree_remapped(struct filedesc *fdp) { - pwd_drop(fdp->fd_pwd); + pwd_drop(smr_serialized_load(&fdp->fd_pwd, true)); fdescfree_fds(curthread, fdp, 0); } @@ -3277,7 +3283,7 @@ pwd_hold_filedesc(struct filedesc *fdp) struct pwd *pwd; FILEDESC_LOCK_ASSERT(fdp); - pwd = fdp->fd_pwd; + pwd = FILEDESC_LOCKED_LOAD_PWD(fdp); if (pwd != NULL) refcount_acquire(&pwd->pwd_refcount); return (pwd); @@ -3291,11 +3297,14 @@ pwd_hold(struct thread *td) fdp = td->td_proc->p_fd; - FILEDESC_SLOCK(fdp); - pwd = fdp->fd_pwd; - MPASS(pwd != NULL); - refcount_acquire(&pwd->pwd_refcount); - FILEDESC_SUNLOCK(fdp); + smr_enter(pwd_smr); + for (;;) { + pwd = smr_entered_load(&fdp->fd_pwd, pwd_smr); + MPASS(pwd != NULL); + if (refcount_acquire_if_not_zero(&pwd->pwd_refcount)) + break; + } + smr_exit(pwd_smr); return (pwd); } @@ -3304,7 +3313,8 @@ pwd_alloc(void) { struct pwd *pwd; - pwd = malloc(sizeof(*pwd), M_PWD, M_WAITOK | M_ZERO); + pwd = uma_zalloc_smr(pwd_zone, M_WAITOK); + bzero(pwd, sizeof(*pwd)); refcount_init(&pwd->pwd_refcount, 1); return (pwd); } @@ -3322,7 +3332,7 @@ pwd_drop(struct pwd *pwd) vrele(pwd->pwd_rdir); if (pwd->pwd_jdir != NULL) vrele(pwd->pwd_jdir); - free(pwd, M_PWD); + uma_zfree_smr(pwd_zone, pwd); } /* @@ -3340,7 +3350,7 @@ pwd_chroot(struct thread *td, struct vnode *vp) fdp = td->td_proc->p_fd; newpwd = pwd_alloc(); FILEDESC_XLOCK(fdp); - oldpwd = fdp->fd_pwd; + oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp); if (chroot_allow_open_directories == 0 || (chroot_allow_open_directories == 1 && oldpwd->pwd_rdir != rootvnode)) { @@ -3376,7 +3386,7 @@ pwd_chdir(struct thread *td, struct vnode *vp) newpwd = pwd_alloc(); fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); - oldpwd = fdp->fd_pwd; + oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp); newpwd->pwd_cdir = vp; pwd_fill(oldpwd, newpwd); pwd_set(fdp, newpwd); @@ -3392,7 +3402,7 @@ pwd_ensure_dirs(void) fdp = curproc->p_fd; FILEDESC_XLOCK(fdp); - oldpwd = fdp->fd_pwd; + oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp); if (oldpwd->pwd_cdir != NULL && oldpwd->pwd_rdir != NULL) { FILEDESC_XUNLOCK(fdp); return; @@ -3401,7 +3411,7 @@ pwd_ensure_dirs(void) newpwd = pwd_alloc(); FILEDESC_XLOCK(fdp); - oldpwd = fdp->fd_pwd; + oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp); pwd_fill(oldpwd, newpwd); if (newpwd->pwd_cdir == NULL) { vrefact(rootvnode); @@ -3441,7 +3451,7 @@ mountcheckdirs(struct vnode *olddp, struct vnode *newdp) if (fdp == NULL) continue; FILEDESC_XLOCK(fdp); - oldpwd = fdp->fd_pwd; + oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp); if (oldpwd == NULL || (oldpwd->pwd_cdir != olddp && oldpwd->pwd_rdir != olddp && @@ -4074,6 +4084,7 @@ int kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen) { struct filedesc *fdp; + struct pwd *pwd; struct export_fd_buf *efbuf; struct vnode *cdir; int error; @@ -4091,7 +4102,8 @@ kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen) efbuf->remainder = maxlen; FILEDESC_SLOCK(fdp); - cdir = fdp->fd_pwd->pwd_cdir; + pwd = FILEDESC_LOCKED_LOAD_PWD(fdp); + cdir = pwd->pwd_cdir; if (cdir == NULL) { error = EINVAL; } else { @@ -4279,6 +4291,9 @@ filelistinit(void *dummy) NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); filedesc0_zone = uma_zcreate("filedesc0", sizeof(struct filedesc0), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL, + NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR); + pwd_smr = uma_zone_get_smr(pwd_zone); mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); } SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL); diff --git a/sys/kern/kern_linker.c b/sys/kern/kern_linker.c index 593540ef3003..889a1d934ac2 100644 --- a/sys/kern/kern_linker.c +++ b/sys/kern/kern_linker.c @@ -2063,6 +2063,22 @@ linker_hwpmc_list_objects(void) } #endif +/* check if root file system is not mounted */ +static bool +linker_root_mounted(void) +{ + struct pwd *pwd; + bool ret; + + if (rootvnode == NULL) + return (false); + + pwd = pwd_hold(curthread); + ret = pwd->pwd_rdir != NULL; + pwd_drop(pwd); + return (ret); +} + /* * Find a file which contains given module and load it, if "parent" is not * NULL, register a reference to it. @@ -2084,15 +2100,13 @@ linker_load_module(const char *kldname, const char *modname, */ KASSERT(verinfo == NULL, ("linker_load_module: verinfo" " is not NULL")); - /* check if root file system is not mounted */ - if (rootvnode == NULL || curproc->p_fd->fd_pwd->pwd_rdir == NULL) + if (!linker_root_mounted()) return (ENXIO); pathname = linker_search_kld(kldname); } else { if (modlist_lookup2(modname, verinfo) != NULL) return (EEXIST); - /* check if root file system is not mounted */ - if (rootvnode == NULL || curproc->p_fd->fd_pwd->pwd_rdir == NULL) + if (!linker_root_mounted()) return (ENXIO); if (kldname != NULL) pathname = strdup(kldname, M_LINKER); diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 116649757de4..aa0a5770630f 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -42,6 +42,8 @@ #include #include #include +#include +#include #include @@ -76,16 +78,23 @@ struct fdescenttbl { */ #define NDSLOTTYPE u_long +/* + * This struct is copy-on-write and allocated from an SMR zone. + * All fields are constant after initialization apart from the reference count. + * + * Check pwd_* routines for usage. + */ struct pwd { volatile u_int pwd_refcount; struct vnode *pwd_cdir; /* current directory */ struct vnode *pwd_rdir; /* root directory */ struct vnode *pwd_jdir; /* jail root directory */ }; +typedef SMR_POINTER(struct pwd *) smrpwd_t; struct filedesc { struct fdescenttbl *fd_files; /* open files table */ - struct pwd *fd_pwd; /* directories */ + smrpwd_t fd_pwd; /* directories */ NDSLOTTYPE *fd_map; /* bitmap of free fds */ int fd_lastfile; /* high-water mark of fd_ofiles */ int fd_freefile; /* approx. next free file */ @@ -141,6 +150,38 @@ struct filedesc_to_leader { SX_NOTRECURSED) #define FILEDESC_UNLOCK_ASSERT(fdp) sx_assert(&(fdp)->fd_sx, SX_UNLOCKED) +#define FILEDESC_LOCKED_LOAD_PWD(fdp) ({ \ + struct filedesc *_fdp = (fdp); \ + struct pwd *_pwd; \ + _pwd = smr_serialized_load(&(_fdp)->fd_pwd, \ + (FILEDESC_LOCK_ASSERT(_fdp), true)); \ + _pwd; \ +}) + +#define FILEDESC_XLOCKED_LOAD_PWD(fdp) ({ \ + struct filedesc *_fdp = (fdp); \ + struct pwd *_pwd; \ + _pwd = smr_serialized_load(&(_fdp)->fd_pwd, \ + (FILEDESC_XLOCK_ASSERT(_fdp), true)); \ + _pwd; \ +}) + +#else + +/* + * Accessor for libkvm et al. + */ +#define FILEDESC_KVM_LOAD_PWD(fdp) ({ \ + struct filedesc *_fdp = (fdp); \ + struct pwd *_pwd; \ + _pwd = smr_kvm_load(&(_fdp)->fd_pwd); \ + _pwd; \ +}) + +#endif + +#ifdef _KERNEL + /* Operation types for kern_dup(). */ enum { FDDUP_NORMAL, /* dup() behavior. */ @@ -265,8 +306,8 @@ static inline void pwd_set(struct filedesc *fdp, struct pwd *newpwd) { - FILEDESC_XLOCK_ASSERT(fdp); - fdp->fd_pwd = newpwd; + smr_serialized_store(&fdp->fd_pwd, newpwd, + (FILEDESC_XLOCK_ASSERT(fdp), true)); } #endif /* _KERNEL */ diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index 0b8068db0da7..b0d6f6d0bfaa 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -3590,6 +3590,7 @@ buffered_write(fp, uio, active_cred, flags, td) int flags; struct thread *td; { + struct pwd *pwd; struct vnode *devvp, *vp; struct inode *ip; struct buf *bp; @@ -3610,7 +3611,8 @@ buffered_write(fp, uio, active_cred, flags, td) return (EINVAL); fdp = td->td_proc->p_fd; FILEDESC_SLOCK(fdp); - vp = fdp->fd_pwd->pwd_cdir; + pwd = FILEDESC_LOCKED_LOAD_PWD(fdp); + vp = pwd->pwd_cdir; vref(vp); FILEDESC_SUNLOCK(fdp); vn_lock(vp, LK_SHARED | LK_RETRY);