From 426da3bcfb40b0b0733f0be4dea9e7659f442368 Mon Sep 17 00:00:00 2001 From: Alfred Perlstein Date: Sun, 13 Jan 2002 11:58:06 +0000 Subject: [PATCH] SMP Lock struct file, filedesc and the global file list. Seigo Tanimura (tanimura) posted the initial delta. I've polished it quite a bit reducing the need for locking and adapting it for KSE. Locks: 1 mutex in each filedesc protects all the fields. protects "struct file" initialization, while a struct file is being changed from &badfileops -> &pipeops or something the filedesc should be locked. 1 mutex in each struct file protects the refcount fields. doesn't protect anything else. the flags used for garbage collection have been moved to f_gcflag which was the FILLER short, this doesn't need locking because the garbage collection is a single threaded container. could likely be made to use a pool mutex. 1 sx lock for the global filelist. struct file * fhold(struct file *fp); /* increments reference count on a file */ struct file * fhold_locked(struct file *fp); /* like fhold but expects file to locked */ struct file * ffind_hold(struct thread *, int fd); /* finds the struct file in thread, adds one reference and returns it unlocked */ struct file * ffind_lock(struct thread *, int fd); /* ffind_hold, but returns file locked */ I still have to smp-safe the fget cruft, I'll get to that asap. --- sys/alpha/osf1/osf1_misc.c | 7 +- sys/alpha/osf1/osf1_mount.c | 4 +- sys/compat/linux/linux_file.c | 34 ++-- sys/compat/linux/linux_ioctl.c | 278 +++++++++++++++++--------- sys/compat/linux/linux_stats.c | 15 +- sys/compat/svr4/svr4_fcntl.c | 39 +++- sys/compat/svr4/svr4_filio.c | 11 +- sys/compat/svr4/svr4_ioctl.c | 21 +- sys/compat/svr4/svr4_misc.c | 37 +++- sys/compat/svr4/svr4_stream.c | 79 +++++--- sys/dev/aac/aac.c | 9 +- sys/dev/streams/streams.c | 26 ++- sys/dev/tdfx/tdfx_pci.c | 6 +- sys/fs/fdescfs/fdesc_vfsops.c | 2 + sys/fs/fdescfs/fdesc_vnops.c | 26 ++- sys/fs/fifofs/fifo_vnops.c | 17 +- sys/fs/portalfs/portal_vfsops.c | 4 +- sys/fs/portalfs/portal_vnops.c | 8 +- sys/fs/unionfs/union_subr.c | 7 +- sys/fs/unionfs/union_vfsops.c | 2 + sys/i386/ibcs2/ibcs2_fcntl.c | 8 +- sys/i386/ibcs2/ibcs2_ioctl.c | 165 ++++++++++------ sys/i386/ibcs2/ibcs2_misc.c | 25 ++- sys/i386/ibcs2/ibcs2_stat.c | 4 +- sys/kern/init_main.c | 3 + sys/kern/kern_acl.c | 4 + sys/kern/kern_descrip.c | 341 +++++++++++++++++++++++++++----- sys/kern/kern_event.c | 94 ++++++--- sys/kern/kern_exec.c | 5 +- sys/kern/kern_fork.c | 12 +- sys/kern/subr_acl_posix1e.c | 4 + sys/kern/sys_generic.c | 133 +++++++++---- sys/kern/sys_pipe.c | 24 ++- sys/kern/sys_socket.c | 7 +- sys/kern/uipc_syscalls.c | 36 +++- sys/kern/uipc_usrreq.c | 70 +++++-- sys/kern/vfs_acl.c | 4 + sys/kern/vfs_cache.c | 17 ++ sys/kern/vfs_extattr.c | 189 ++++++++++++++---- sys/kern/vfs_lookup.c | 2 + sys/kern/vfs_syscalls.c | 189 ++++++++++++++---- sys/kern/vfs_vnops.c | 2 +- sys/netgraph/ng_socket.c | 9 +- sys/netsmb/smb_dev.c | 27 ++- sys/sys/fcntl.h | 5 +- sys/sys/file.h | 105 ++++++---- sys/sys/filedesc.h | 29 ++- sys/ufs/ffs/ffs_alloc.c | 3 + sys/vm/vm_mmap.c | 19 +- 49 files changed, 1625 insertions(+), 542 deletions(-) diff --git a/sys/alpha/osf1/osf1_misc.c b/sys/alpha/osf1/osf1_misc.c index 84526e209c58..46b20d9f8020 100644 --- a/sys/alpha/osf1/osf1_misc.c +++ b/sys/alpha/osf1/osf1_misc.c @@ -667,18 +667,17 @@ osf1_fstat(td, uap) struct thread *td; register struct osf1_fstat_args *uap; { - register struct filedesc *fdp; register struct file *fp; struct stat ub; struct osf1_stat oub; int error; - fdp = td->td_proc->p_fd; - if ((unsigned)SCARG(uap, fd) >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) + fp = ffind_hold(td, uap->fd); + if (fp == NULL) return (EBADF); error = fo_stat(fp, &ub, td); + fdrop(fp, td); cvtstat2osf1(&ub, &oub); if (error == 0) error = copyout((caddr_t)&oub, (caddr_t)SCARG(uap, sb), diff --git a/sys/alpha/osf1/osf1_mount.c b/sys/alpha/osf1/osf1_mount.c index 2e88b17989f6..4244da36d8f2 100644 --- a/sys/alpha/osf1/osf1_mount.c +++ b/sys/alpha/osf1/osf1_mount.c @@ -154,7 +154,9 @@ osf1_fstatfs(td, uap) return (error); mp = ((struct vnode *)fp->f_data)->v_mount; sp = &mp->mnt_stat; - if ((error = VFS_STATFS(mp, sp, td))) + error = VFS_STATFS(mp, sp, td); + fdrop(fp, td); + if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; bsd2osf_statfs(sp, &osfs); diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c index 4609a1b84b34..9e1156c9aeb7 100644 --- a/sys/compat/linux/linux_file.c +++ b/sys/compat/linux/linux_file.c @@ -135,12 +135,13 @@ linux_open(struct thread *td, struct linux_open_args *args) PROC_LOCK(p); if (!error && !(bsd_open_args.flags & O_NOCTTY) && SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) { - struct filedesc *fdp = p->p_fd; - struct file *fp = fdp->fd_ofiles[td->td_retval[0]]; + struct file *fp; + fp = ffind_hold(td, td->td_retval[0]); PROC_UNLOCK(p); if (fp->f_type == DTYPE_VNODE) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0, td); + fdrop(fp, td); } else PROC_UNLOCK(p); #ifdef DEBUG @@ -270,21 +271,29 @@ getdents_common(struct thread *td, struct linux_getdents64_args *args, if ((error = getvnode(td->td_proc->p_fd, args->fd, &fp)) != 0) return (error); - if ((fp->f_flag & FREAD) == 0) + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); return (EBADF); + } vp = (struct vnode *) fp->f_data; - if (vp->v_type != VDIR) + if (vp->v_type != VDIR) { + fdrop(fp, td); return (EINVAL); + } - if ((error = VOP_GETATTR(vp, &va, td->td_proc->p_ucred, td))) + if ((error = VOP_GETATTR(vp, &va, td->td_proc->p_ucred, td))) { + fdrop(fp, td); return (error); + } nbytes = args->count; if (nbytes == 1) { /* readdir(2) case. Always struct dirent. */ - if (is64bit) + if (is64bit) { + fdrop(fp, td); return (EINVAL); + } nbytes = sizeof(linux_dirent); justone = 1; } else @@ -435,6 +444,7 @@ getdents_common(struct thread *td, struct linux_getdents64_args *args, free(cookies, M_TEMP); VOP_UNLOCK(vp, 0, td); + fdrop(fp, td); free(buf, M_TEMP); return (error); } @@ -987,12 +997,14 @@ fcntl_common(struct thread *td, struct linux_fcntl64_args *args) * significant effect for pipes (SIGIO is not delivered for * pipes under Linux-2.2.35 at least). */ - fdp = td->td_proc->p_fd; - if ((u_int)args->fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[args->fd]) == NULL) - return (EBADF); - if (fp->f_type == DTYPE_PIPE) + fp = ffind_hold(td, args->fd); + if (fp == NULL) + return EBADF; + if (fp->f_type == DTYPE_PIPE) { + fdrop(fp, td); return (EINVAL); + } + fdrop(fp, td); fcntl_args.cmd = F_SETOWN; fcntl_args.arg = args->arg; diff --git a/sys/compat/linux/linux_ioctl.c b/sys/compat/linux/linux_ioctl.c index f5ab820f6db9..baafb5b28ebd 100644 --- a/sys/compat/linux/linux_ioctl.c +++ b/sys/compat/linux/linux_ioctl.c @@ -105,19 +105,23 @@ static TAILQ_HEAD(, handler_element) handlers = static int linux_ioctl_disk(struct thread *td, struct linux_ioctl_args *args) { - struct file *fp = td->td_proc->p_fd->fd_ofiles[args->fd]; + struct file *fp; int error; struct disklabel dl; + fp = ffind_hold(td, args->fd); + if (fp == NULL) + return (EBADF); switch (args->cmd & 0xffff) { case LINUX_BLKGETSIZE: error = fo_ioctl(fp, DIOCGDINFO, (caddr_t)&dl, td); + fdrop(fp, td); if (error) return (error); return (copyout(&(dl.d_secperunit), (caddr_t)args->arg, sizeof(dl.d_secperunit))); - break; } + fdrop(fp, td); return (ENOIOCTL); } @@ -548,66 +552,77 @@ linux_ioctl_termio(struct thread *td, struct linux_ioctl_args *args) struct termios bios; struct linux_termios lios; struct linux_termio lio; - struct file *fp = td->td_proc->p_fd->fd_ofiles[args->fd]; + struct file *fp; int error; + fp = ffind_hold(td, args->fd); + if (fp == NULL) + return (EBADF); switch (args->cmd & 0xffff) { case LINUX_TCGETS: error = fo_ioctl(fp, TIOCGETA, (caddr_t)&bios, td); if (error) - return (error); + break; bsd_to_linux_termios(&bios, &lios); - return copyout(&lios, (caddr_t)args->arg, sizeof(lios)); + error = copyout(&lios, (caddr_t)args->arg, sizeof(lios)); + break; case LINUX_TCSETS: error = copyin((caddr_t)args->arg, &lios, sizeof(lios)); if (error) - return (error); + break; linux_to_bsd_termios(&lios, &bios); - return (fo_ioctl(fp, TIOCSETA, (caddr_t)&bios, td)); + error = (fo_ioctl(fp, TIOCSETA, (caddr_t)&bios, td)); + break; case LINUX_TCSETSW: error = copyin((caddr_t)args->arg, &lios, sizeof(lios)); if (error) - return (error); + break; linux_to_bsd_termios(&lios, &bios); - return (fo_ioctl(fp, TIOCSETAW, (caddr_t)&bios, td)); + error = (fo_ioctl(fp, TIOCSETAW, (caddr_t)&bios, td)); + break; case LINUX_TCSETSF: error = copyin((caddr_t)args->arg, &lios, sizeof(lios)); if (error) - return (error); + break; linux_to_bsd_termios(&lios, &bios); - return (fo_ioctl(fp, TIOCSETAF, (caddr_t)&bios, td)); + error = (fo_ioctl(fp, TIOCSETAF, (caddr_t)&bios, td)); + break; case LINUX_TCGETA: error = fo_ioctl(fp, TIOCGETA, (caddr_t)&bios, td); if (error) - return (error); + break; bsd_to_linux_termio(&bios, &lio); - return (copyout(&lio, (caddr_t)args->arg, sizeof(lio))); + error = (copyout(&lio, (caddr_t)args->arg, sizeof(lio))); + break; case LINUX_TCSETA: error = copyin((caddr_t)args->arg, &lio, sizeof(lio)); if (error) - return (error); + break; linux_to_bsd_termio(&lio, &bios); - return (fo_ioctl(fp, TIOCSETA, (caddr_t)&bios, td)); + error = (fo_ioctl(fp, TIOCSETA, (caddr_t)&bios, td)); + break; case LINUX_TCSETAW: error = copyin((caddr_t)args->arg, &lio, sizeof(lio)); if (error) - return (error); + break; linux_to_bsd_termio(&lio, &bios); - return (fo_ioctl(fp, TIOCSETAW, (caddr_t)&bios, td)); + error = (fo_ioctl(fp, TIOCSETAW, (caddr_t)&bios, td)); + break; case LINUX_TCSETAF: error = copyin((caddr_t)args->arg, &lio, sizeof(lio)); if (error) - return (error); + break; linux_to_bsd_termio(&lio, &bios); - return (fo_ioctl(fp, TIOCSETAF, (caddr_t)&bios, td)); + error = (fo_ioctl(fp, TIOCSETAF, (caddr_t)&bios, td)); + break; /* LINUX_TCSBRK */ @@ -625,7 +640,8 @@ linux_ioctl_termio(struct thread *td, struct linux_ioctl_args *args) struct write_args wr; error = fo_ioctl(fp, TIOCGETA, (caddr_t)&bios, td); if (error) - return (error); + break; + fdrop(fp, td); c = (args->arg == LINUX_TCIOFF) ? VSTOP : VSTART; c = bios.c_cc[c]; if (c != _POSIX_VDISABLE) { @@ -637,10 +653,12 @@ linux_ioctl_termio(struct thread *td, struct linux_ioctl_args *args) return (0); } default: + fdrop(fp, td); return (EINVAL); } args->arg = 0; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; } case LINUX_TCFLSH: { @@ -656,97 +674,115 @@ linux_ioctl_termio(struct thread *td, struct linux_ioctl_args *args) args->arg = FREAD | FWRITE; break; default: + fdrop(fp, td); return (EINVAL); } - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; } case LINUX_TIOCEXCL: args->cmd = TIOCEXCL; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_TIOCNXCL: args->cmd = TIOCNXCL; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; /* LINUX_TIOCSCTTY */ case LINUX_TIOCGPGRP: args->cmd = TIOCGPGRP; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_TIOCSPGRP: args->cmd = TIOCSPGRP; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; /* LINUX_TIOCOUTQ */ /* LINUX_TIOCSTI */ case LINUX_TIOCGWINSZ: args->cmd = TIOCGWINSZ; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_TIOCSWINSZ: args->cmd = TIOCSWINSZ; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_TIOCMGET: args->cmd = TIOCMGET; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_TIOCMBIS: args->cmd = TIOCMBIS; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_TIOCMBIC: args->cmd = TIOCMBIC; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_TIOCMSET: args->cmd = TIOCMSET; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; /* TIOCGSOFTCAR */ /* TIOCSSOFTCAR */ case LINUX_FIONREAD: /* LINUX_TIOCINQ */ args->cmd = FIONREAD; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; /* LINUX_TIOCLINUX */ case LINUX_TIOCCONS: args->cmd = TIOCCONS; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_TIOCGSERIAL: { struct linux_serial_struct lss; lss.type = LINUX_PORT_16550A; lss.flags = 0; lss.close_delay = 0; - return copyout(&lss, (caddr_t)args->arg, sizeof(lss)); + error = copyout(&lss, (caddr_t)args->arg, sizeof(lss)); + break; } case LINUX_TIOCSSERIAL: { struct linux_serial_struct lss; error = copyin((caddr_t)args->arg, &lss, sizeof(lss)); if (error) - return (error); + break; /* XXX - It really helps to have an implementation that * does nothing. NOT! */ - return (0); + error = 0; + break; } /* LINUX_TIOCPKT */ case LINUX_FIONBIO: args->cmd = FIONBIO; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_TIOCNOTTY: args->cmd = TIOCNOTTY; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_TIOCSETD: { int line; @@ -761,9 +797,11 @@ linux_ioctl_termio(struct thread *td, struct linux_ioctl_args *args) line = PPPDISC; break; default: + fdrop(fp, td); return (EINVAL); } - return (fo_ioctl(fp, TIOCSETD, (caddr_t)&line, td)); + error = (fo_ioctl(fp, TIOCSETD, (caddr_t)&line, td)); + break; } case LINUX_TIOCGETD: { @@ -783,9 +821,11 @@ linux_ioctl_termio(struct thread *td, struct linux_ioctl_args *args) linux_line = LINUX_N_PPP; break; default: + fdrop(fp, td); return (EINVAL); } - return (copyout(&linux_line, (caddr_t)args->arg, sizeof(int))); + error = (copyout(&linux_line, (caddr_t)args->arg, sizeof(int))); + break; } /* LINUX_TCSBRKP */ @@ -793,15 +833,18 @@ linux_ioctl_termio(struct thread *td, struct linux_ioctl_args *args) case LINUX_FIONCLEX: args->cmd = FIONCLEX; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_FIOCLEX: args->cmd = FIOCLEX; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_FIOASYNC: args->cmd = FIOASYNC; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; /* LINUX_TIOCSERCONFIG */ /* LINUX_TIOCSERGWILD */ @@ -809,9 +852,13 @@ linux_ioctl_termio(struct thread *td, struct linux_ioctl_args *args) /* LINUX_TIOCGLCKTRMIOS */ /* LINUX_TIOCSLCKTRMIOS */ + default: + error = ENOIOCTL; + break; } - return (ENOIOCTL); + fdrop(fp, td); + return (error); } /* @@ -1199,26 +1246,33 @@ bsd_to_linux_dvd_authinfo(struct dvd_authinfo *bp, l_dvd_authinfo *lp) static int linux_ioctl_cdrom(struct thread *td, struct linux_ioctl_args *args) { - struct file *fp = td->td_proc->p_fd->fd_ofiles[args->fd]; + struct file *fp; int error; + fp = ffind_hold(td, args->fd); + if (fp == NULL) + return (EBADF); switch (args->cmd & 0xffff) { case LINUX_CDROMPAUSE: args->cmd = CDIOCPAUSE; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_CDROMRESUME: args->cmd = CDIOCRESUME; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_CDROMPLAYMSF: args->cmd = CDIOCPLAYMSF; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_CDROMPLAYTRKIND: args->cmd = CDIOCPLAYTRACKS; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_CDROMREADTOCHDR: { struct ioc_toc_header th; @@ -1229,7 +1283,7 @@ linux_ioctl_cdrom(struct thread *td, struct linux_ioctl_args *args) lth.cdth_trk1 = th.ending_track; copyout(<h, (caddr_t)args->arg, sizeof(lth)); } - return (error); + break; } case LINUX_CDROMREADTOCENTRY: { @@ -1247,20 +1301,23 @@ linux_ioctl_cdrom(struct thread *td, struct linux_ioctl_args *args) &irtse.entry.addr, <e.cdte_addr); copyout(<e, (caddr_t)args->arg, sizeof(lte)); } - return (error); + break; } case LINUX_CDROMSTOP: args->cmd = CDIOCSTOP; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_CDROMSTART: args->cmd = CDIOCSTART; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_CDROMEJECT: args->cmd = CDIOCEJECT; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; /* LINUX_CDROMVOLCTRL */ @@ -1278,11 +1335,11 @@ linux_ioctl_cdrom(struct thread *td, struct linux_ioctl_args *args) bsdsc.data = bsdinfo; error = fo_ioctl(fp, CDIOCREADSUBCHANNEL, (caddr_t)&bsdsc, td); if (error) - return (error); + break; error = copyin((caddr_t)args->arg, &sc, sizeof(struct linux_cdrom_subchnl)); if (error) - return (error); + break; sc.cdsc_audiostatus = bsdinfo->header.audio_status; sc.cdsc_adr = bsdinfo->what.position.addr_type; sc.cdsc_ctrl = bsdinfo->what.position.control; @@ -1294,7 +1351,7 @@ linux_ioctl_cdrom(struct thread *td, struct linux_ioctl_args *args) bsdinfo->what.position.reladdr.lba); error = copyout(&sc, (caddr_t)args->arg, sizeof(struct linux_cdrom_subchnl)); - return (error); + break; } /* LINUX_CDROMREADMODE2 */ @@ -1306,7 +1363,8 @@ linux_ioctl_cdrom(struct thread *td, struct linux_ioctl_args *args) case LINUX_CDROMRESET: args->cmd = CDIOCRESET; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; /* LINUX_CDROMVOLREAD */ /* LINUX_CDROMREADRAW */ @@ -1337,18 +1395,19 @@ linux_ioctl_cdrom(struct thread *td, struct linux_ioctl_args *args) error = copyin((caddr_t)args->arg, &lds, sizeof(l_dvd_struct)); if (error) - return (error); + break; error = linux_to_bsd_dvd_struct(&lds, &bds); if (error) - return (error); + break; error = fo_ioctl(fp, DVDIOCREADSTRUCTURE, (caddr_t)&bds, td); if (error) - return (error); + break; error = bsd_to_linux_dvd_struct(&bds, &lds); if (error) - return (error); - return (copyout(&lds, (caddr_t)args->arg, - sizeof(l_dvd_struct))); + break; + error = copyout(&lds, (caddr_t)args->arg, + sizeof(l_dvd_struct)); + break; } /* LINUX_DVD_WRITE_STRUCT */ @@ -1361,10 +1420,10 @@ linux_ioctl_cdrom(struct thread *td, struct linux_ioctl_args *args) error = copyin((caddr_t)args->arg, &lda, sizeof(l_dvd_authinfo)); if (error) - return (error); + break; error = linux_to_bsd_dvd_authinfo(&lda, &bcode, &bda); if (error) - return (error); + break; error = fo_ioctl(fp, bcode, (caddr_t)&bda, td); if (error) { if (lda.type == LINUX_DVD_HOST_SEND_KEY2) { @@ -1372,22 +1431,27 @@ linux_ioctl_cdrom(struct thread *td, struct linux_ioctl_args *args) copyout(&lda, (caddr_t)args->arg, sizeof(l_dvd_authinfo)); } - return (error); + break; } error = bsd_to_linux_dvd_authinfo(&bda, &lda); if (error) - return (error); - return (copyout(&lda, (caddr_t)args->arg, - sizeof(l_dvd_authinfo))); + break; + error = copyout(&lda, (caddr_t)args->arg, + sizeof(l_dvd_authinfo)); + break; } /* LINUX_CDROM_SEND_PACKET */ /* LINUX_CDROM_NEXT_WRITABLE */ /* LINUX_CDROM_LAST_WRITTEN */ + default: + error = ENOIOCTL; + break; } - return (ENOIOCTL); + fdrop(fp, td); + return (error); } /* @@ -1639,37 +1703,48 @@ linux_ioctl_sound(struct thread *td, struct linux_ioctl_args *args) static int linux_ioctl_console(struct thread *td, struct linux_ioctl_args *args) { - struct file *fp = td->td_proc->p_fd->fd_ofiles[args->fd]; + struct file *fp; + int error; + fp = ffind_hold(td, args->fd); + if (fp == NULL) + return (EBADF); switch (args->cmd & 0xffff) { case LINUX_KIOCSOUND: args->cmd = KIOCSOUND; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_KDMKTONE: args->cmd = KDMKTONE; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_KDGETLED: args->cmd = KDGETLED; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_KDSETLED: args->cmd = KDSETLED; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_KDSETMODE: args->cmd = KDSETMODE; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_KDGETMODE: args->cmd = KDGETMODE; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_KDGKBMODE: args->cmd = KDGKBMODE; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_KDSKBMODE: { int kbdmode; @@ -1684,18 +1759,22 @@ linux_ioctl_console(struct thread *td, struct linux_ioctl_args *args) kbdmode = K_RAW; break; default: + fdrop(fp, td); return (EINVAL); } - return (fo_ioctl(fp, KDSKBMODE, (caddr_t)&kbdmode, td)); + error = (fo_ioctl(fp, KDSKBMODE, (caddr_t)&kbdmode, td)); + break; } case LINUX_VT_OPENQRY: args->cmd = VT_OPENQRY; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_VT_GETMODE: args->cmd = VT_GETMODE; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_VT_SETMODE: { struct vt_mode *mode; @@ -1703,28 +1782,37 @@ linux_ioctl_console(struct thread *td, struct linux_ioctl_args *args) mode = (struct vt_mode *)args->arg; if (!ISSIGVALID(mode->frsig) && ISSIGVALID(mode->acqsig)) mode->frsig = mode->acqsig; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; } case LINUX_VT_GETSTATE: args->cmd = VT_GETACTIVE; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_VT_RELDISP: args->cmd = VT_RELDISP; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_VT_ACTIVATE: args->cmd = VT_ACTIVATE; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; case LINUX_VT_WAITACTIVE: args->cmd = VT_WAITACTIVE; - return (ioctl(td, (struct ioctl_args *)args)); + error = (ioctl(td, (struct ioctl_args *)args)); + break; + default: + error = ENOIOCTL; + break; } - return (ENOIOCTL); + fdrop(fp, td); + return (error); } /* @@ -2161,7 +2249,6 @@ linux_ioctl_special(struct thread *td, struct linux_ioctl_args *args) int linux_ioctl(struct thread *td, struct linux_ioctl_args *args) { - struct filedesc *fdp; struct file *fp; struct handler_element *he; int error, cmd; @@ -2172,12 +2259,13 @@ linux_ioctl(struct thread *td, struct linux_ioctl_args *args) (unsigned long)args->cmd); #endif - fdp = td->td_proc->p_fd; - if ((unsigned)args->fd >= fdp->fd_nfiles) + fp = ffind_hold(td, args->fd); + if (fp == NULL) return (EBADF); - fp = fdp->fd_ofiles[args->fd]; - if (fp == NULL || (fp->f_flag & (FREAD|FWRITE)) == 0) + if ((fp->f_flag & (FREAD|FWRITE)) == 0) { + fdrop(fp, td); return (EBADF); + } /* Iterate over the ioctl handlers */ cmd = args->cmd & 0xffff; @@ -2185,9 +2273,11 @@ linux_ioctl(struct thread *td, struct linux_ioctl_args *args) if (cmd >= he->low && cmd <= he->high) { error = (*he->func)(td, args); if (error != ENOIOCTL) + fdrop(fp, td); return (error); } } + fdrop(fp, td); printf("linux: 'ioctl' fd=%d, cmd=0x%x ('%c',%d) not implemented\n", args->fd, (int)(args->cmd & 0xffff), diff --git a/sys/compat/linux/linux_stats.c b/sys/compat/linux/linux_stats.c index f7d5b391ae31..8f1799cd1d83 100644 --- a/sys/compat/linux/linux_stats.c +++ b/sys/compat/linux/linux_stats.c @@ -151,7 +151,6 @@ linux_newlstat(struct thread *td, struct linux_newlstat_args *args) int linux_newfstat(struct thread *td, struct linux_newfstat_args *args) { - struct filedesc *fdp; struct file *fp; struct stat buf; int error; @@ -161,12 +160,12 @@ linux_newfstat(struct thread *td, struct linux_newfstat_args *args) printf(ARGS(newfstat, "%d, *"), args->fd); #endif - fdp = td->td_proc->p_fd; - if ((unsigned)args->fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[args->fd]) == NULL) + fp = ffind_hold(td, args->fd); + if (fp == NULL) return (EBADF); error = fo_stat(fp, &buf, td); + fdrop(fp, td); if (!error) error = newstat_copyout(&buf, args->buf); @@ -286,8 +285,10 @@ linux_fstatfs(struct thread *td, struct linux_fstatfs_args *args) mp = ((struct vnode *)fp->f_data)->v_mount; bsd_statfs = &mp->mnt_stat; error = VFS_STATFS(mp, bsd_statfs, td); - if (error) + if (error) { + fdrop(fp, td); return error; + } bsd_statfs->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; linux_statfs.f_type = bsd_to_linux_ftype(bsd_statfs->f_fstypename); linux_statfs.f_bsize = bsd_statfs->f_bsize; @@ -299,8 +300,10 @@ linux_fstatfs(struct thread *td, struct linux_fstatfs_args *args) linux_statfs.f_fsid.val[0] = bsd_statfs->f_fsid.val[0]; linux_statfs.f_fsid.val[1] = bsd_statfs->f_fsid.val[1]; linux_statfs.f_namelen = MAXNAMLEN; - return copyout((caddr_t)&linux_statfs, (caddr_t)args->buf, + error = copyout((caddr_t)&linux_statfs, (caddr_t)args->buf, sizeof(linux_statfs)); + fdrop(fp, td); + return error; } struct l_ustat diff --git a/sys/compat/svr4/svr4_fcntl.c b/sys/compat/svr4/svr4_fcntl.c index 465bae54609c..a13cd232a328 100644 --- a/sys/compat/svr4/svr4_fcntl.c +++ b/sys/compat/svr4/svr4_fcntl.c @@ -246,7 +246,6 @@ fd_revoke(td, fd) struct thread *td; int fd; { - struct filedesc *fdp = td->td_proc->p_fd; struct file *fp; struct vnode *vp; struct mount *mp; @@ -254,11 +253,14 @@ fd_revoke(td, fd) int error, *retval; retval = td->td_retval; - if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) + fp = ffind_hold(td, fd); + if (fp == NULL) return EBADF; - if (fp->f_type != DTYPE_VNODE) + if (fp->f_type != DTYPE_VNODE) { + fdrop(fp, td); return EINVAL; + } vp = (struct vnode *) fp->f_data; @@ -281,6 +283,7 @@ fd_revoke(td, fd) vn_finished_write(mp); out: vrele(vp); + fdrop(fp, td); return error; } @@ -291,7 +294,6 @@ fd_truncate(td, fd, flp) int fd; struct flock *flp; { - struct filedesc *fdp = td->td_proc->p_fd; struct file *fp; off_t start, length; struct vnode *vp; @@ -304,15 +306,20 @@ fd_truncate(td, fd, flp) /* * We only support truncating the file. */ - if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) + fp = ffind_hold(td, fd); + if (fp == NULL) return EBADF; vp = (struct vnode *)fp->f_data; - if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) + if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { + fdrop(fp, td); return ESPIPE; + } if ((error = VOP_GETATTR(vp, &vattr, td->td_proc->p_ucred, td)) != 0) + fdrop(fp, td); return error; + } length = vattr.va_size; @@ -330,6 +337,7 @@ fd_truncate(td, fd, flp) break; default: + fdrop(fp, td); return EINVAL; } @@ -341,7 +349,10 @@ fd_truncate(td, fd, flp) SCARG(&ft, fd) = fd; SCARG(&ft, length) = start; - return ftruncate(td, &ft); + error = ftruncate(td, &ft); + + fdrop(fp, td); + return (error); } int @@ -373,15 +384,23 @@ svr4_sys_open(td, uap) if (!(SCARG(&cup, flags) & O_NOCTTY) && SESS_LEADER(p) && !(td->td_proc->p_flag & P_CONTROLT)) { #if defined(NOTYET) - struct filedesc *fdp = td->td_proc->p_fd; - struct file *fp = fdp->fd_ofiles[retval]; + struct file *fp; + fp = ffind_hold(td, retval); PROC_UNLOCK(p); + /* + * we may have lost a race the above open() and + * another thread issuing a close() + */ + if (fp == NULL) + return (EBADF); /* XXX: correct errno? */ /* ignore any error, just give it a try */ if (fp->f_type == DTYPE_VNODE) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0, td); - } else + fdrop(fp, td); + } else { PROC_UNLOCK(p); + } #else } PROC_UNLOCK(p); diff --git a/sys/compat/svr4/svr4_filio.c b/sys/compat/svr4/svr4_filio.c index 8d24788532ca..b24a039cddd5 100644 --- a/sys/compat/svr4/svr4_filio.c +++ b/sys/compat/svr4/svr4_filio.c @@ -97,7 +97,6 @@ svr4_sys_read(td, uap) struct svr4_sys_read_args *uap; { struct read_args ra; - struct filedesc *fdp = td->td_proc->p_fd; struct file *fp; struct socket *so = NULL; int so_state; @@ -108,7 +107,8 @@ svr4_sys_read(td, uap) SCARG(&ra, buf) = SCARG(uap, buf); SCARG(&ra, nbyte) = SCARG(uap, nbyte); - if ((fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) { + fp = ffind_hold(td, uap->fd); + if (fp == NULL) { DPRINTF(("Something fishy with the user-supplied file descriptor...\n")); return EBADF; } @@ -142,6 +142,7 @@ svr4_sys_read(td, uap) so->so_state = so_state; } #endif + fdrop(fp, td); return(rv); } @@ -154,7 +155,6 @@ svr4_sys_write(td, uap) struct svr4_sys_write_args *uap; { struct write_args wa; - struct filedesc *fdp; struct file *fp; int rv; @@ -186,13 +186,16 @@ svr4_fil_ioctl(fp, td, retval, fd, cmd, data) *retval = 0; + FILEDESC_LOCK(fdp); switch (cmd) { case SVR4_FIOCLEX: fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + FILEDESC_UNLOCK(fdp); return 0; case SVR4_FIONCLEX: fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; + FILEDESC_UNLOCK(fdp); return 0; case SVR4_FIOGETOWN: @@ -200,6 +203,7 @@ svr4_fil_ioctl(fp, td, retval, fd, cmd, data) case SVR4_FIOASYNC: case SVR4_FIONBIO: case SVR4_FIONREAD: + FILEDESC_UNLOCK(fdp); if ((error = copyin(data, &num, sizeof(num))) != 0) return error; @@ -222,6 +226,7 @@ svr4_fil_ioctl(fp, td, retval, fd, cmd, data) return copyout(&num, data, sizeof(num)); default: + FILEDESC_UNLOCK(fdp); DPRINTF(("Unknown svr4 filio %lx\n", cmd)); return 0; /* ENOSYS really */ } diff --git a/sys/compat/svr4/svr4_ioctl.c b/sys/compat/svr4/svr4_ioctl.c index e153713fbba9..024c2e5ed7f4 100644 --- a/sys/compat/svr4/svr4_ioctl.c +++ b/sys/compat/svr4/svr4_ioctl.c @@ -84,10 +84,10 @@ svr4_sys_ioctl(td, uap) { int *retval; struct file *fp; - struct filedesc *fdp; u_long cmd; int (*fun) __P((struct file *, struct thread *, register_t *, int, u_long, caddr_t)); + int error; #ifdef DEBUG_SVR4 char dir[4]; char c; @@ -100,15 +100,16 @@ svr4_sys_ioctl(td, uap) dir, c, num, argsiz, SCARG(uap, data))); #endif retval = td->td_retval; - fdp = td->td_proc->p_fd; cmd = SCARG(uap, com); - if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) + fp = ffind_hold(td, uap->fd); + if (fp == NULL) return EBADF; - if ((fp->f_flag & (FREAD | FWRITE)) == 0) + if ((fp->f_flag & (FREAD | FWRITE)) == 0) { + fdrop(fp, td); return EBADF; + } #if defined(DEBUG_SVR4) if (fp->f_type == DTYPE_SOCKET) { @@ -145,17 +146,23 @@ svr4_sys_ioctl(td, uap) case SVR4_XIOC: /* We do not support those */ + fdrop(fp, td); return EINVAL; default: + fdrop(fp, td); DPRINTF(("Unimplemented ioctl %lx\n", cmd)); return 0; /* XXX: really ENOSYS */ } #if defined(DEBUG_SVR4) if (fp->f_type == DTYPE_SOCKET) { - struct socket *so = (struct socket *)fp->f_data; + struct socket *so; + + so = (struct socket *)fp->f_data; DPRINTF((">>> OUT: so_state = 0x%x\n", so->so_state)); } #endif - return (*fun)(fp, td, retval, SCARG(uap, fd), cmd, SCARG(uap, data)); + error = (*fun)(fp, td, retval, SCARG(uap, fd), cmd, SCARG(uap, data)); + fdrop(fp, td); + return (error); } diff --git a/sys/compat/svr4/svr4_misc.c b/sys/compat/svr4/svr4_misc.c index 700be90f13ae..e83e6d53a33b 100644 --- a/sys/compat/svr4/svr4_misc.c +++ b/sys/compat/svr4/svr4_misc.c @@ -263,15 +263,20 @@ svr4_sys_getdents64(td, uap) return (error); } - if ((fp->f_flag & FREAD) == 0) + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); return (EBADF); + } vp = (struct vnode *) fp->f_data; - if (vp->v_type != VDIR) + if (vp->v_type != VDIR) { + fdrop(fp, td); return (EINVAL); + } if ((error = VOP_GETATTR(vp, &va, td->td_proc->p_ucred, td))) { + fdrop(fp, td); return error; } @@ -400,9 +405,10 @@ svr4_sys_getdents64(td, uap) eof: td->td_retval[0] = nbytes - resid; out: + VOP_UNLOCK(vp, 0, td); + fdrop(fp, td); if (cookies) free(cookies, M_TEMP); - VOP_UNLOCK(vp, 0, td); free(buf, M_TEMP); return error; } @@ -431,12 +437,16 @@ svr4_sys_getdents(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - if ((fp->f_flag & FREAD) == 0) + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); return (EBADF); + } vp = (struct vnode *)fp->f_data; - if (vp->v_type != VDIR) + if (vp->v_type != VDIR) { + fdrop(fp, td); return (EINVAL); + } buflen = min(MAXBSIZE, SCARG(uap, nbytes)); buf = malloc(buflen, M_TEMP, M_WAITOK); @@ -458,8 +468,9 @@ svr4_sys_getdents(td, uap) */ error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookiebuf); - if (error) + if (error) { goto out; + } inp = buf; outp = SCARG(uap, buf); @@ -515,6 +526,7 @@ svr4_sys_getdents(td, uap) *retval = SCARG(uap, nbytes) - resid; out: VOP_UNLOCK(vp, 0, td); + fdrop(fp, td); if (cookiebuf) free(cookiebuf, M_TEMP); free(buf, M_TEMP); @@ -607,11 +619,17 @@ svr4_sys_fchroot(td, uap) error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred, td); VOP_UNLOCK(vp, 0, td); if (error) + fdrop(fp, td); return error; + } VREF(vp); - if (fdp->fd_rdir != NULL) - vrele(fdp->fd_rdir); + FILEDESC_LOCK(fdp); + vpold = fdp->fd_rdir; fdp->fd_rdir = vp; + FILEDESC_UNLOCK(fdp); + if (vpold != NULL) + vrele(vpold); + fdrop(fp, td); return 0; } @@ -1221,15 +1239,16 @@ svr4_sys_waitsys(td, uap) nfound = 0; sx_slock(&proctree_lock); LIST_FOREACH(q, &td->td_proc->p_children, p_sibling) { + PROC_LOCK(q); if (SCARG(uap, id) != WAIT_ANY && q->p_pid != SCARG(uap, id) && q->p_pgid != -SCARG(uap, id)) { + PROC_UNLOCK(q); DPRINTF(("pid %d pgid %d != %d\n", q->p_pid, q->p_pgid, SCARG(uap, id))); continue; } nfound++; - PROC_LOCK(q); mtx_lock_spin(&sched_lock); if (q->p_stat == SZOMB && ((SCARG(uap, options) & (SVR4_WEXITED|SVR4_WTRAPPED)))) { diff --git a/sys/compat/svr4/svr4_stream.c b/sys/compat/svr4/svr4_stream.c index bf1c15ac8535..6681acca4f53 100644 --- a/sys/compat/svr4/svr4_stream.c +++ b/sys/compat/svr4/svr4_stream.c @@ -77,6 +77,10 @@ /* Utils */ static int clean_pipe __P((struct thread *, const char *)); static void getparm __P((struct file *, struct svr4_si_sockparms *)); +static int svr4_do_putmsg __P((struct proc *, struct svr4_sys_putmsg_args *, + struct file *)); +static int svr4_do_getmsg __P((struct proc *, struct svr4_sys_getmsg_args *, + struct file *)); /* Address Conversions */ static void sockaddr_to_netaddr_in __P((struct svr4_strmcmd *, @@ -235,6 +239,7 @@ svr4_sendit(td, s, mp, flags) if (to) FREE(to, M_SONAME); done1: + fdrop(fp, td); fputsock(so); return (error); } @@ -359,6 +364,7 @@ svr4_recvit(td, s, mp, namelenp) if (control) m_freem(control); done1: + fdrop(fp, td); fputsock(so); return (error); } @@ -619,12 +625,15 @@ getparm(fp, pa) struct file *fp; struct svr4_si_sockparms *pa; { - struct svr4_strm *st = svr4_stream_get(fp); - struct socket *so = (struct socket *) fp->f_data; + struct svr4_strm *st; + struct socket *so; + st = svr4_stream_get(fp); if (st == NULL) return; + so = (struct socket *) fp->f_data; + pa->family = st->s_family; switch (so->so_type) { @@ -1705,8 +1714,27 @@ svr4_sys_putmsg(td, uap) register struct thread *td; struct svr4_sys_putmsg_args *uap; { - struct filedesc *fdp = td->td_proc->p_fd; + struct file *fp; + int error; + + fp = ffind_hold(td, uap->fd); + if (fp == NULL) { +#ifdef DEBUG_SVR4 + uprintf("putmsg: bad fp\n"); +#endif + return EBADF; + } + error = svr4_do_putmsg(td, uap, fp); + fdrop(fp, td); + return (error); +} + +static int +svr4_do_putmsg(td, uap, fp) + struct thread *td; + struct svr4_sys_putmsg_args *uap; struct file *fp; +{ struct svr4_strbuf dat, ctl; struct svr4_strmcmd sc; struct sockaddr_in sain; @@ -1718,26 +1746,13 @@ svr4_sys_putmsg(td, uap) caddr_t sg; retval = td->td_retval; - fp = fdp->fd_ofiles[SCARG(uap, fd)]; - - if (((u_int)SCARG(uap, fd) >= fdp->fd_nfiles) || (fp == NULL)) { -#ifdef DEBUG_SVR4 - uprintf("putmsg: bad fp\n"); -#endif - return EBADF; - } #ifdef DEBUG_SVR4 show_msg(">putmsg", SCARG(uap, fd), SCARG(uap, ctl), SCARG(uap, dat), SCARG(uap, flags)); #endif /* DEBUG_SVR4 */ - if (((u_int)SCARG(uap, fd) >= fdp->fd_nfiles) || (fp == NULL)) { -#ifdef DEBUG_SVR4 - uprintf("putmsg: bad fp(2)\n"); -#endif - return EBADF; - } + FILE_LOCK_ASSERT(fp, MA_NOTOWNED); if (SCARG(uap, ctl) != NULL) { if ((error = copyin(SCARG(uap, ctl), &ctl, sizeof(ctl))) != 0) { @@ -1882,13 +1897,32 @@ svr4_sys_putmsg(td, uap) } } +int +svr4_sys_getmsg(p, uap) + struct proc *p; + struct svr4_sys_getmsg_args *uap; +{ + struct file *fp; + int error; + + fp = ffind_hold(td, uap->fd); + if (fp == NULL) { +#ifdef DEBUG_SVR4 + uprintf("getmsg: bad fp\n"); +#endif + return EBADF; + } + error = svr4_do_getmsg(p, uap, fp); + fdrop(fp, td); + return (error); +} + int svr4_sys_getmsg(td, uap) register struct thread *td; struct svr4_sys_getmsg_args *uap; + struct file *fp; { - struct filedesc *fdp = td->td_proc->p_fd; - struct file *fp; struct getpeername_args ga; struct accept_args aa; struct svr4_strbuf dat, ctl; @@ -1906,10 +1940,8 @@ svr4_sys_getmsg(td, uap) caddr_t sg; retval = td->td_retval; - fp = fdp->fd_ofiles[SCARG(uap, fd)]; - if (((u_int)SCARG(uap, fd) >= fdp->fd_nfiles) || (fp == NULL)) - return EBADF; + FILE_LOCK_ASSERT(fp, MA_NOTOWNED); memset(&sc, 0, sizeof(sc)); @@ -1917,9 +1949,6 @@ svr4_sys_getmsg(td, uap) show_msg(">getmsg", SCARG(uap, fd), SCARG(uap, ctl), SCARG(uap, dat), 0); #endif /* DEBUG_SVR4 */ - - if (((u_int)SCARG(uap, fd) >= fdp->fd_nfiles) || (fp == NULL)) - return EBADF; if (SCARG(uap, ctl) != NULL) { if ((error = copyin(SCARG(uap, ctl), &ctl, sizeof(ctl))) != 0) diff --git a/sys/dev/aac/aac.c b/sys/dev/aac/aac.c index 42e3697901e9..f456f3f39ae1 100644 --- a/sys/dev/aac/aac.c +++ b/sys/dev/aac/aac.c @@ -2576,16 +2576,21 @@ aac_linux_ioctl(struct thread *td, struct linux_ioctl_args *args) { struct file *fp; u_long cmd; + int error; debug_called(2); - fp = td->td_proc->p_fd->fd_ofiles[args->fd]; + fp = ffind_hold(td, args->fd); + if (fp == NULL) + return (EBADF); cmd = args->cmd; /* * Pass the ioctl off to our standard handler. */ - return(fo_ioctl(fp, cmd, (caddr_t)args->arg, td)); + error = (fo_ioctl(fp, cmd, (caddr_t)args->arg, td)); + fdrop(fp, td); + return (error); } #endif diff --git a/sys/dev/streams/streams.c b/sys/dev/streams/streams.c index 38d7fee29734..a5c2b4df2254 100644 --- a/sys/dev/streams/streams.c +++ b/sys/dev/streams/streams.c @@ -266,15 +266,19 @@ streamsopen(dev_t dev, int oflags, int devtype, struct thread *td) if ((error = socreate(family, &so, type, protocol, td->td_proc->p_ucred, td)) != 0) { + FILEDESC_LOCK(p->p_fd); p->p_fd->fd_ofiles[fd] = 0; + FILEDESC_UNLOCK(p->p_fd); ffree(fp); return error; } + FILEDESC_LOCK(p->p_fd); fp->f_data = (caddr_t)so; fp->f_flag = FREAD|FWRITE; fp->f_ops = &svr4_netops; fp->f_type = DTYPE_SOCKET; + FILEDESC_UNLOCK(p->p_fd); (void)svr4_stream_get(fp); PROC_LOCK(p); @@ -355,8 +359,12 @@ svr4_stream_get(fp) so = (struct socket *) fp->f_data; - if (so->so_emuldata) + /* + * mpfixme: lock socketbuffer here + */ + if (so->so_emuldata) { return so->so_emuldata; + } /* Allocate a new one. */ st = malloc(sizeof(struct svr4_strm), M_TEMP, M_WAITOK); @@ -364,8 +372,19 @@ svr4_stream_get(fp) st->s_cmd = ~0; st->s_afd = -1; st->s_eventmask = 0; - so->so_emuldata = st; - fp->f_ops = &svr4_netops; + /* + * avoid a race where we loose due to concurrancy issues + * of two threads trying to allocate the so_emuldata. + */ + if (so->so_emuldata) { + /* lost the race, use the existing emuldata */ + FREE(st, M_TEMP); + st = so->so_emuldata; + } else { + /* we won, or there was no race, use our copy */ + so->so_emuldata = st; + fp->f_ops = &svr4_netops; + } return st; } @@ -406,5 +425,4 @@ svr4_soo_close(struct file *fp, struct thread *td) svr4_delete_socket(td->td_proc, fp); free(so->so_emuldata, M_TEMP); return soo_close(fp, td); - return (0); } diff --git a/sys/dev/tdfx/tdfx_pci.c b/sys/dev/tdfx/tdfx_pci.c index 5594a13630eb..cfbfe18a0833 100644 --- a/sys/dev/tdfx/tdfx_pci.c +++ b/sys/dev/tdfx/tdfx_pci.c @@ -842,11 +842,15 @@ linux_ioctl_tdfx(struct thread *td, struct linux_ioctl_args* args) and one void*. */ char d_pio[2*sizeof(short) + sizeof(int) + sizeof(void*)]; - struct file *fp = td->td_proc->p_fd->fd_ofiles[args->fd]; + struct file *fp; + fp = ffind_hold(td, args->fd); + if (fp == NULL) + return (EBADF); /* We simply copy the data and send it right to ioctl */ copyin((caddr_t)args->arg, &d_pio, sizeof(d_pio)); error = fo_ioctl(fp, cmd, (caddr_t)&d_pio, td); + fdrop(fp, td); return error; } #endif /* TDFX_LINUX */ diff --git a/sys/fs/fdescfs/fdesc_vfsops.c b/sys/fs/fdescfs/fdesc_vfsops.c index b9b34161014b..541358f3c3fe 100644 --- a/sys/fs/fdescfs/fdesc_vfsops.c +++ b/sys/fs/fdescfs/fdesc_vfsops.c @@ -175,6 +175,7 @@ fdesc_statfs(mp, sbp, td) */ lim = td->td_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur; fdp = td->td_proc->p_fd; + FILEDESC_LOCK(fdp); last = min(fdp->fd_nfiles, lim); freefd = 0; for (i = fdp->fd_freefile; i < last; i++) @@ -187,6 +188,7 @@ fdesc_statfs(mp, sbp, td) */ if (fdp->fd_nfiles < lim) freefd += (lim - fdp->fd_nfiles); + FILEDESC_UNLOCK(fdp); sbp->f_flags = 0; sbp->f_bsize = DEV_BSIZE; diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c index 7cd2feece507..eb6f0dbe1abf 100644 --- a/sys/fs/fdescfs/fdesc_vnops.c +++ b/sys/fs/fdescfs/fdesc_vnops.c @@ -174,8 +174,8 @@ fdesc_lookup(ap) struct componentname *cnp = ap->a_cnp; char *pname = cnp->cn_nameptr; struct thread *td = cnp->cn_thread; + struct file *fp; int nlen = cnp->cn_namelen; - int nfiles = td->td_proc->p_fd->fd_nfiles; u_int fd; int error; struct vnode *fvp; @@ -212,12 +212,14 @@ fdesc_lookup(ap) fd = 10 * fd + *pname++ - '0'; } - if (fd >= nfiles || td->td_proc->p_fd->fd_ofiles[fd] == NULL) { + fp = ffind_hold(td, fd); + if (fp == NULL) { error = EBADF; goto bad; } error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp, td); + fdrop(fp, td); if (error) goto bad; VTOFDESC(fvp)->fd_fd = fd; @@ -268,7 +270,6 @@ fdesc_getattr(ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; - struct filedesc *fdp = ap->a_td->td_proc->p_fd; struct file *fp; struct stat stb; u_int fd; @@ -299,11 +300,13 @@ fdesc_getattr(ap) case Fdesc: fd = VTOFDESC(vp)->fd_fd; - if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) + fp = ffind_hold(ap->a_td, fd); + if (fp == NULL) return (EBADF); bzero(&stb, sizeof(stb)); error = fo_stat(fp, &stb, ap->a_td); + fdrop(fp, ap->a_td); if (error == 0) { VATTR_NULL(vap); vap->va_type = IFTOVT(stb.st_mode); @@ -396,10 +399,13 @@ fdesc_setattr(ap) return (error); } vp = (struct vnode *)fp->f_data; - if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + fdrop(fp, ap->a_td); return (error); + } error = VOP_SETATTR(vp, ap->a_vap, ap->a_cred, ap->a_td); vn_finished_write(mp); + fdrop(fp, ap->a_td); return (error); } @@ -442,6 +448,7 @@ fdesc_readdir(ap) fcnt = i - 2; /* The first two nodes are `.' and `..' */ + FILEDESC_LOCK(fdp); while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) { switch (i) { case 0: /* `.' */ @@ -456,8 +463,10 @@ fdesc_readdir(ap) dp->d_type = DT_DIR; break; default: - if (fdp->fd_ofiles[fcnt] == NULL) + if (fdp->fd_ofiles[fcnt] == NULL) { + FILEDESC_UNLOCK(fdp); goto done; + } bzero((caddr_t) dp, UIO_MX); dp->d_namlen = sprintf(dp->d_name, "%d", fcnt); @@ -469,12 +478,15 @@ fdesc_readdir(ap) /* * And ship to userland */ + FILEDESC_UNLOCK(fdp); error = uiomove((caddr_t) dp, UIO_MX, uio); if (error) - break; + goto done; + FILEDESC_LOCK(fdp); i++; fcnt++; } + FILEDESC_UNLOCK(fdp); done: uio->uio_offset = i * UIO_MX; diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c index 461f7c461203..3597cbb15fab 100644 --- a/sys/fs/fifofs/fifo_vnops.c +++ b/sys/fs/fifofs/fifo_vnops.c @@ -344,23 +344,29 @@ fifo_ioctl(ap) } */ *ap; { struct file filetmp; - int error; + int error = 0; if (ap->a_command == FIONBIO) return (0); + mtx_init(&filetmp.f_mtx, "struct file", MTX_DEF); + filetmp.f_count = 1; if (ap->a_fflag & FREAD) { + /* filetmp is local, hence not need be locked. */ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock; error = soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_td); if (error) - return (error); + goto err; } if (ap->a_fflag & FWRITE) { + /* filetmp is local, hence not need be locked. */ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock; error = soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_td); if (error) - return (error); + goto err; } - return (0); +err: + mtx_destroy(&filetmp.f_mtx); + return (error); } /* ARGSUSED */ @@ -459,6 +465,8 @@ fifo_poll(ap) struct file filetmp; int revents = 0; + mtx_init(&filetmp.f_mtx, "struct file", MTX_DEF); + filetmp.f_count = 1; if (ap->a_events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock; if (filetmp.f_data) @@ -471,6 +479,7 @@ fifo_poll(ap) revents |= soo_poll(&filetmp, ap->a_events, ap->a_cred, ap->a_td); } + mtx_destroy(&filetmp.f_mtx); return (revents); } diff --git a/sys/fs/portalfs/portal_vfsops.c b/sys/fs/portalfs/portal_vfsops.c index 4e7313130e5a..bc71fa5b16d1 100644 --- a/sys/fs/portalfs/portal_vfsops.c +++ b/sys/fs/portalfs/portal_vfsops.c @@ -132,7 +132,8 @@ portal_mount(mp, path, data, ndp, td) VTOPORTAL(rvp)->pt_size = 0; VTOPORTAL(rvp)->pt_fileid = PORTAL_ROOTFILEID; fmp->pm_root = rvp; - fmp->pm_server = fp; fp->f_count++; + fhold(fp); + fmp->pm_server = fp; mp->mnt_flag |= MNT_LOCAL; mp->mnt_data = (qaddr_t) fmp; @@ -159,6 +160,7 @@ portal_unmount(mp, mntflags, td) struct thread *td; { int error, flags = 0; + struct socket *so; if (mntflags & MNT_FORCE) diff --git a/sys/fs/portalfs/portal_vnops.c b/sys/fs/portalfs/portal_vnops.c index dd90516fbcfc..e8feeb1a7fb0 100644 --- a/sys/fs/portalfs/portal_vnops.c +++ b/sys/fs/portalfs/portal_vnops.c @@ -402,12 +402,18 @@ portal_open(ap) * Check that the mode the file is being opened for is a subset * of the mode of the existing descriptor. */ - fp = td->td_proc->p_fd->fd_ofiles[fd]; + fp = ffind_hold(td, fd); + if (fp == NULL) { + error = EBADF; + goto bad; + } if (((ap->a_mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { + fdrop(fp, td); portal_closefd(td, fd); error = EACCES; goto bad; } + fdrop(fp, td); /* * Save the dup fd in the proc structure then return the diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c index 8320f4a99fa4..70a9f52ed9b4 100644 --- a/sys/fs/unionfs/union_subr.c +++ b/sys/fs/unionfs/union_subr.c @@ -1048,10 +1048,13 @@ union_vn_create(vpp, un, td) struct vattr *vap = &vat; int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); int error; - int cmode = UN_FILEMODE & ~td->td_proc->p_fd->fd_cmask; + int cmode; struct componentname cn; *vpp = NULLVP; + FILEDESC_LOCK(td->td_proc->p_fd); + cmode = UN_FILEMODE & ~td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); /* * Build a new componentname structure (for the same @@ -1323,8 +1326,10 @@ union_dircheck(struct thread *td, struct vnode **vp, struct file *fp) return (error); } VOP_UNLOCK(lvp, 0, td); + FILE_LOCK(fp); fp->f_data = (caddr_t) lvp; fp->f_offset = 0; + FILE_UNLOCK(fp); error = vn_close(*vp, FREAD, fp->f_cred, td); if (error) return (error); diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c index 7f3d5bd36b44..92f46d5dac28 100644 --- a/sys/fs/unionfs/union_vfsops.c +++ b/sys/fs/unionfs/union_vfsops.c @@ -224,7 +224,9 @@ union_mount(mp, path, data, ndp, td) } um->um_cred = crhold(td->td_proc->p_ucred); + FILEDESC_LOCK(td->td_proc->p_fd); um->um_cmode = UN_DIRMODE &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); /* * Depending on what you think the MNT_LOCAL flag might mean, diff --git a/sys/i386/ibcs2/ibcs2_fcntl.c b/sys/i386/ibcs2/ibcs2_fcntl.c index 6151c5da1695..a54d40ae095b 100644 --- a/sys/i386/ibcs2/ibcs2_fcntl.c +++ b/sys/i386/ibcs2/ibcs2_fcntl.c @@ -193,13 +193,17 @@ ibcs2_open(td, uap) #endif /* SPX_HACK */ PROC_LOCK(p); if (!ret && !noctty && SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) { - struct filedesc *fdp = p->p_fd; - struct file *fp = fdp->fd_ofiles[td->td_retval[0]]; + struct file *fp; + fp = ffind_hold(td, td->td_retval[0]); PROC_UNLOCK(p); + if (fp == NULL) + return (EBADF); + /* ignore any error, just give it a try */ if (fp->f_type == DTYPE_VNODE) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0, td); + fdrop(fp, td); } else PROC_UNLOCK(p); return ret; diff --git a/sys/i386/ibcs2/ibcs2_ioctl.c b/sys/i386/ibcs2/ibcs2_ioctl.c index de63ad837cdd..a37e11633f7c 100644 --- a/sys/i386/ibcs2/ibcs2_ioctl.c +++ b/sys/i386/ibcs2/ibcs2_ioctl.c @@ -54,6 +54,8 @@ static void stios2btios __P((struct ibcs2_termios *, struct termios *)); static void btios2stios __P((struct termios *, struct ibcs2_termios *)); static void stios2stio __P((struct ibcs2_termios *, struct ibcs2_termio *)); static void stio2stios __P((struct ibcs2_termio *, struct ibcs2_termios *)); +static int ibcs2_do_ioctl __P((struct proc *, struct ibcs2_ioctl_args *, + struct file *)); int @@ -341,18 +343,18 @@ ibcs2_ioctl(td, uap) struct ibcs2_ioctl_args *uap; { struct proc *p = td->td_proc; - struct filedesc *fdp = p->p_fd; struct file *fp; int error; - if (SCARG(uap, fd) < 0 || SCARG(uap, fd) >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) { + fp = ffind_hold(td, uap->fd); + if (fp == NULL) { DPRINTF(("ibcs2_ioctl(%d): bad fd %d ", p->p_pid, SCARG(uap, fd))); return EBADF; } if ((fp->f_flag & (FREAD|FWRITE)) == 0) { + fdrop(fp, td); DPRINTF(("ibcs2_ioctl(%d): bad fp flag ", p->p_pid)); return EBADF; } @@ -367,7 +369,7 @@ ibcs2_ioctl(td, uap) struct ibcs2_termio st; if ((error = fo_ioctl(fp, TIOCGETA, (caddr_t)&bts, td)) != 0) - return error; + break; btios2stios (&bts, &sts); if (SCARG(uap, cmd) == IBCS2_TCGETA) { @@ -379,10 +381,12 @@ ibcs2_ioctl(td, uap) DPRINTF(("ibcs2_ioctl(%d): copyout failed ", p->p_pid)); #endif - return error; - } else - return copyout((caddr_t)&sts, SCARG(uap, data), + break; + } else { + error = copyout((caddr_t)&sts, SCARG(uap, data), sizeof (sts)); + break; + } /*NOTREACHED*/ } @@ -398,14 +402,14 @@ ibcs2_ioctl(td, uap) sizeof(st))) != 0) { DPRINTF(("ibcs2_ioctl(%d): TCSET copyin failed ", p->p_pid)); - return error; + break; } /* get full BSD termios so we don't lose information */ if ((error = fo_ioctl(fp, TIOCGETA, (caddr_t)&bts, td)) != 0) { DPRINTF(("ibcs2_ioctl(%d): TCSET ctl failed fd %d ", p->p_pid, SCARG(uap, fd))); - return error; + break; } /* @@ -416,8 +420,9 @@ ibcs2_ioctl(td, uap) stio2stios(&st, &sts); stios2btios(&sts, &bts); - return fo_ioctl(fp, SCARG(uap, cmd) - IBCS2_TCSETA + TIOCSETA, + error = fo_ioctl(fp, SCARG(uap, cmd) - IBCS2_TCSETA + TIOCSETA, (caddr_t)&bts, td); + break; } case IBCS2_XCSETA: @@ -428,12 +433,12 @@ ibcs2_ioctl(td, uap) struct ibcs2_termios sts; if ((error = copyin(SCARG(uap, data), (caddr_t)&sts, - sizeof (sts))) != 0) { - return error; - } + sizeof (sts))) != 0) + break; stios2btios (&sts, &bts); - return fo_ioctl(fp, SCARG(uap, cmd) - IBCS2_XCSETA + TIOCSETA, + error = fo_ioctl(fp, SCARG(uap, cmd) - IBCS2_XCSETA + TIOCSETA, (caddr_t)&bts, td); + break; } case IBCS2_OXCSETA: @@ -444,17 +449,18 @@ ibcs2_ioctl(td, uap) struct ibcs2_termios sts; if ((error = copyin(SCARG(uap, data), (caddr_t)&sts, - sizeof (sts))) != 0) { - return error; - } + sizeof (sts))) != 0) + break; stios2btios (&sts, &bts); - return fo_ioctl(fp, SCARG(uap, cmd) - IBCS2_OXCSETA + TIOCSETA, + error = fo_ioctl(fp, SCARG(uap, cmd) - IBCS2_OXCSETA + TIOCSETA, (caddr_t)&bts, td); + break; } case IBCS2_TCSBRK: DPRINTF(("ibcs2_ioctl(%d): TCSBRK ", p->p_pid)); - return ENOSYS; + error = ENOSYS; + break; case IBCS2_TCXONC: { @@ -462,14 +468,19 @@ ibcs2_ioctl(td, uap) case 0: case 1: DPRINTF(("ibcs2_ioctl(%d): TCXONC ", p->p_pid)); - return ENOSYS; + error = ENOSYS; + break; case 2: - return fo_ioctl(fp, TIOCSTOP, (caddr_t)0, td); + error = fo_ioctl(fp, TIOCSTOP, (caddr_t)0, td); + break; case 3: - return fo_ioctl(fp, TIOCSTART, (caddr_t)1, td); + error = fo_ioctl(fp, TIOCSTART, (caddr_t)1, td); + break; default: - return EINVAL; + error = EINVAL; + break; } + break; } case IBCS2_TCFLSH: @@ -487,25 +498,34 @@ ibcs2_ioctl(td, uap) arg = FREAD | FWRITE; break; default: + fdrop(fp, td); return EINVAL; } - return fo_ioctl(fp, TIOCFLUSH, (caddr_t)&arg, td); + error = fo_ioctl(fp, TIOCFLUSH, (caddr_t)&arg, td); + break; } case IBCS2_TIOCGWINSZ: SCARG(uap, cmd) = TIOCGWINSZ; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_TIOCSWINSZ: SCARG(uap, cmd) = TIOCSWINSZ; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_TIOCGPGRP: + { + pid_t pg_id; + PROC_LOCK(p); - error = copyout((caddr_t)&p->p_pgrp->pg_id, SCARG(uap, data), - sizeof(p->p_pgrp->pg_id)); + pg_id = p->p_pgrp->pg_id; PROC_UNLOCK(p); - return error; + error = copyout((caddr_t)&pg_id, SCARG(uap, data), + sizeof(pg_id)); + break; + } case IBCS2_TIOCSPGRP: /* XXX - is uap->data a pointer to pgid? */ { @@ -513,16 +533,17 @@ ibcs2_ioctl(td, uap) SCARG(&sa, pid) = 0; SCARG(&sa, pgid) = (int)SCARG(uap, data); - if ((error = setpgid(td, &sa)) != 0) - return error; - return 0; + error = setpgid(td, &sa); + break; } case IBCS2_TCGETSC: /* SCO console - get scancode flags */ - return EINTR; /* ENOSYS; */ + error = EINTR; /* ENOSYS; */ + break; case IBCS2_TCSETSC: /* SCO console - set scancode flags */ - return 0; /* ENOSYS; */ + error = 0; /* ENOSYS; */ + break; case IBCS2_JWINSIZE: /* Unix to Jerq I/O control */ { @@ -541,106 +562,132 @@ ibcs2_ioctl(td, uap) ibcs2_jwinsize.bity = p->p_session->s_ttyp->t_winsize.ws_ypixel; PROC_UNLOCK(p); - return copyout((caddr_t)&ibcs2_jwinsize, SCARG(uap, data), + error = copyout((caddr_t)&ibcs2_jwinsize, SCARG(uap, data), sizeof(ibcs2_jwinsize)); + break; } /* keyboard and display ioctl's -- type 'K' */ case IBCS2_KDGKBMODE: /* get keyboard translation mode */ SCARG(uap, cmd) = KDGKBMODE; /* printf("ioctl KDGKBMODE = %x\n", SCARG(uap, cmd));*/ - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_KDSKBMODE: /* set keyboard translation mode */ SCARG(uap, cmd) = KDSKBMODE; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_KDMKTONE: /* sound tone */ SCARG(uap, cmd) = KDMKTONE; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_KDGETMODE: /* get text/graphics mode */ SCARG(uap, cmd) = KDGETMODE; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_KDSETMODE: /* set text/graphics mode */ SCARG(uap, cmd) = KDSETMODE; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_KDSBORDER: /* set ega color border */ SCARG(uap, cmd) = KDSBORDER; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_KDGKBSTATE: SCARG(uap, cmd) = KDGKBSTATE; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_KDSETRAD: SCARG(uap, cmd) = KDSETRAD; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_KDENABIO: /* enable direct I/O to ports */ SCARG(uap, cmd) = KDENABIO; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_KDDISABIO: /* disable direct I/O to ports */ SCARG(uap, cmd) = KDDISABIO; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_KIOCSOUND: /* start sound generation */ SCARG(uap, cmd) = KIOCSOUND; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_KDGKBTYPE: /* get keyboard type */ SCARG(uap, cmd) = KDGKBTYPE; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_KDGETLED: /* get keyboard LED status */ SCARG(uap, cmd) = KDGETLED; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_KDSETLED: /* set keyboard LED status */ SCARG(uap, cmd) = KDSETLED; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; /* Xenix keyboard and display ioctl's from sys/kd.h -- type 'k' */ case IBCS2_GETFKEY: /* Get function key */ SCARG(uap, cmd) = GETFKEY; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_SETFKEY: /* Set function key */ SCARG(uap, cmd) = SETFKEY; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_GIO_SCRNMAP: /* Get screen output map table */ SCARG(uap, cmd) = GIO_SCRNMAP; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_PIO_SCRNMAP: /* Set screen output map table */ SCARG(uap, cmd) = PIO_SCRNMAP; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_GIO_KEYMAP: /* Get keyboard map table */ SCARG(uap, cmd) = GIO_KEYMAP; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; case IBCS2_PIO_KEYMAP: /* Set keyboard map table */ SCARG(uap, cmd) = PIO_KEYMAP; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; /* socksys */ case IBCS2_SIOCSOCKSYS: - return ibcs2_socksys(td, (struct ibcs2_socksys_args *)uap); + error = ibcs2_socksys(td, (struct ibcs2_socksys_args *)uap); + break; case IBCS2_FIONREAD: case IBCS2_I_NREAD: /* STREAMS */ SCARG(uap, cmd) = FIONREAD; - return ioctl(td, (struct ioctl_args *)uap); + error = ioctl(td, (struct ioctl_args *)uap); + break; default: DPRINTF(("ibcs2_ioctl(%d): unknown cmd 0x%lx ", td->proc->p_pid, SCARG(uap, cmd))); - return ENOSYS; + error = ENOSYS; + break; } - return ENOSYS; + + fdrop(fp, td); + return error; } diff --git a/sys/i386/ibcs2/ibcs2_misc.c b/sys/i386/ibcs2/ibcs2_misc.c index bbe32eb391d7..3be77c8d9a34 100644 --- a/sys/i386/ibcs2/ibcs2_misc.c +++ b/sys/i386/ibcs2/ibcs2_misc.c @@ -316,11 +316,15 @@ ibcs2_getdents(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - if ((fp->f_flag & FREAD) == 0) + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); return (EBADF); + } vp = (struct vnode *)fp->f_data; - if (vp->v_type != VDIR) /* XXX vnode readdir op should do this */ + if (vp->v_type != VDIR) { /* XXX vnode readdir op should do this */ + fdrop(fp, td); return (EINVAL); + } off = fp->f_offset; #define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */ @@ -427,9 +431,10 @@ ibcs2_getdents(td, uap) eof: td->td_retval[0] = SCARG(uap, nbytes) - resid; out: + VOP_UNLOCK(vp, 0, td); + fdrop(fp, td); if (cookies) free(cookies, M_TEMP); - VOP_UNLOCK(vp, 0, td); free(buf, M_TEMP); return (error); } @@ -462,15 +467,22 @@ ibcs2_read(td, uap) else return error; } - if ((fp->f_flag & FREAD) == 0) + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); return (EBADF); + } vp = (struct vnode *)fp->f_data; + if (vp->v_type != VDIR) { + fdrop(fp, td); + return read(td, (struct read_args *)uap); + } + + off = fp->f_offset; if (vp->v_type != VDIR) return read(td, (struct read_args *)uap); DPRINTF(("ibcs2_read: read directory\n")); - off = fp->f_offset; buflen = max(DIRBLKSIZ, SCARG(uap, nbytes)); buflen = min(buflen, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); @@ -578,9 +590,10 @@ ibcs2_read(td, uap) eof: td->td_retval[0] = SCARG(uap, nbytes) - resid; out: + VOP_UNLOCK(vp, 0, td); + fdrop(fp, td); if (cookies) free(cookies, M_TEMP); - VOP_UNLOCK(vp, 0, td); free(buf, M_TEMP); return (error); } diff --git a/sys/i386/ibcs2/ibcs2_stat.c b/sys/i386/ibcs2/ibcs2_stat.c index fd39853f7839..148da177f093 100644 --- a/sys/i386/ibcs2/ibcs2_stat.c +++ b/sys/i386/ibcs2/ibcs2_stat.c @@ -133,7 +133,9 @@ ibcs2_fstatfs(td, uap) return (error); mp = ((struct vnode *)fp->f_data)->v_mount; sp = &mp->mnt_stat; - if ((error = VFS_STATFS(mp, sp, td)) != 0) + error = VFS_STATFS(mp, sp, td); + fdrop(fp, td); + if (error != 0) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; return cvt_statfs(sp, (caddr_t)SCARG(uap, buf), SCARG(uap, len)); diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 4acc213f760a..e984aec1be93 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -358,6 +358,7 @@ proc0_init(void *dummy __unused) /* Create the file descriptor table. */ fdp = &filedesc0; p->p_fd = &fdp->fd_fd; + mtx_init(&fdp->fd_fd.fd_mtx, "struct filedesc", MTX_DEF); fdp->fd_fd.fd_refcnt = 1; fdp->fd_fd.fd_cmask = cmask; fdp->fd_fd.fd_ofiles = fdp->fd_dfiles; @@ -487,10 +488,12 @@ start_init(void *dummy) /* Get the vnode for '/'. Set p->p_fd->fd_cdir to reference it. */ if (VFS_ROOT(TAILQ_FIRST(&mountlist), &rootvnode)) panic("cannot find root vnode"); + FILEDESC_LOCK(p->p_fd); p->p_fd->fd_cdir = rootvnode; VREF(p->p_fd->fd_cdir); p->p_fd->fd_rdir = rootvnode; VREF(p->p_fd->fd_rdir); + FILEDESC_UNLOCK(p->p_fd); VOP_UNLOCK(rootvnode, 0, td); if (devfs_present) { diff --git a/sys/kern/kern_acl.c b/sys/kern/kern_acl.c index b50c89645d73..63be63d733a3 100644 --- a/sys/kern/kern_acl.c +++ b/sys/kern/kern_acl.c @@ -703,6 +703,7 @@ __acl_get_fd(struct thread *td, struct __acl_get_fd_args *uap) if (error == 0) { error = vacl_get_acl(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -724,6 +725,7 @@ __acl_set_fd(struct thread *td, struct __acl_set_fd_args *uap) if (error == 0) { error = vacl_set_acl(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -767,6 +769,7 @@ __acl_delete_fd(struct thread *td, struct __acl_delete_fd_args *uap) if (error == 0) { error = vacl_delete(td, (struct vnode *)fp->f_data, SCARG(uap, type)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -811,6 +814,7 @@ __acl_aclcheck_fd(struct thread *td, struct __acl_aclcheck_fd_args *uap) if (error == 0) { error = vacl_aclcheck(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 55bab6301e89..1538698b11aa 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -109,6 +109,7 @@ static int badfo_close __P((struct file *fp, struct thread *td)); struct filelist filehead; /* head of list of open files */ int nfiles; /* actual number of open files */ extern int cmask; +struct sx filelist_lock; /* sx to protect filelist */ /* * System calls on descriptors. @@ -163,22 +164,27 @@ dup2(td, uap) int i, error; mtx_lock(&Giant); + FILEDESC_LOCK(fdp); retry: if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL || new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || new >= maxfilesperproc) { + FILEDESC_UNLOCK(fdp); error = EBADF; goto done2; } if (old == new) { td->td_retval[0] = new; + FILEDESC_UNLOCK(fdp); error = 0; goto done2; } if (new >= fdp->fd_nfiles) { - if ((error = fdalloc(td, new, &i))) + if ((error = fdalloc(td, new, &i))) { + FILEDESC_UNLOCK(fdp); goto done2; + } if (new != i) panic("dup2: fdalloc"); /* @@ -216,12 +222,16 @@ dup(td, uap) mtx_lock(&Giant); old = uap->fd; fdp = td->td_proc->p_fd; + FILEDESC_LOCK(fdp); if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { + FILEDESC_UNLOCK(fdp); error = EBADF; goto done2; } - if ((error = fdalloc(td, 0, &new))) + if ((error = fdalloc(td, 0, &new))) { + FILEDESC_UNLOCK(fdp); goto done2; + } error = do_dup(fdp, (int)old, new, td->td_retval, td); done2: mtx_unlock(&Giant); @@ -255,12 +265,15 @@ fcntl(td, uap) int i, tmp, error = 0, flg = F_POSIX; struct flock fl; u_int newmin; + struct proc *leaderp; mtx_lock(&Giant); fdp = p->p_fd; + FILEDESC_LOCK(fdp); if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) { + FILEDESC_UNLOCK(fdp); error = EBADF; goto done2; } @@ -271,28 +284,37 @@ fcntl(td, uap) newmin = uap->arg; if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || newmin >= maxfilesperproc) { + FILEDESC_UNLOCK(fdp); error = EINVAL; break; } - if ((error = fdalloc(td, newmin, &i))) + if ((error = fdalloc(td, newmin, &i))) { + FILEDESC_UNLOCK(fdp); break; + } error = do_dup(fdp, uap->fd, i, td->td_retval, td); break; case F_GETFD: td->td_retval[0] = *pop & 1; + FILEDESC_UNLOCK(fdp); break; case F_SETFD: *pop = (*pop &~ 1) | (uap->arg & 1); + FILEDESC_UNLOCK(fdp); break; case F_GETFL: + FILE_LOCK(fp); + FILEDESC_UNLOCK(fdp); td->td_retval[0] = OFLAGS(fp->f_flag); + FILE_UNLOCK(fp); break; case F_SETFL: fhold(fp); + FILEDESC_UNLOCK(fdp); fp->f_flag &= ~FCNTLFLAGS; fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS; tmp = fp->f_flag & FNONBLOCK; @@ -315,12 +337,14 @@ fcntl(td, uap) case F_GETOWN: fhold(fp); + FILEDESC_UNLOCK(fdp); error = fo_ioctl(fp, FIOGETOWN, (caddr_t)td->td_retval, td); fdrop(fp, td); break; case F_SETOWN: fhold(fp); + FILEDESC_UNLOCK(fdp); error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, td); fdrop(fp, td); break; @@ -331,15 +355,18 @@ fcntl(td, uap) case F_SETLK: if (fp->f_type != DTYPE_VNODE) { + FILEDESC_UNLOCK(fdp); error = EBADF; break; } vp = (struct vnode *)fp->f_data; - /* * copyin/lockop may block */ fhold(fp); + FILEDESC_UNLOCK(fdp); + vp = (struct vnode *)fp->f_data; + /* Copy in the lock structure */ error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl, sizeof(fl)); @@ -364,8 +391,11 @@ fcntl(td, uap) error = EBADF; break; } + PROC_LOCK(p); p->p_flag |= P_ADVLOCK; - error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, + leaderp = p->p_leader; + PROC_UNLOCK(p); + error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK, &fl, flg); break; case F_WRLCK: @@ -373,12 +403,18 @@ fcntl(td, uap) error = EBADF; break; } + PROC_LOCK(p); p->p_flag |= P_ADVLOCK; - error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, + leaderp = p->p_leader; + PROC_UNLOCK(p); + error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK, &fl, flg); break; case F_UNLCK: - error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, + PROC_LOCK(p); + leaderp = p->p_leader; + PROC_UNLOCK(p); + error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_UNLCK, &fl, F_POSIX); break; default: @@ -390,6 +426,7 @@ fcntl(td, uap) case F_GETLK: if (fp->f_type != DTYPE_VNODE) { + FILEDESC_UNLOCK(fdp); error = EBADF; break; } @@ -398,6 +435,9 @@ fcntl(td, uap) * copyin/lockop may block */ fhold(fp); + FILEDESC_UNLOCK(fdp); + vp = (struct vnode *)fp->f_data; + /* Copy in the lock structure */ error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl, sizeof(fl)); @@ -431,6 +471,7 @@ fcntl(td, uap) } break; default: + FILEDESC_UNLOCK(fdp); error = EINVAL; break; } @@ -441,6 +482,7 @@ fcntl(td, uap) /* * Common code for dup, dup2, and fcntl(F_DUPFD). + * filedesc must be locked, but will be unlocked as a side effect. */ static int do_dup(fdp, old, new, retval, td) @@ -452,6 +494,8 @@ do_dup(fdp, old, new, retval, td) struct file *fp; struct file *delfp; + FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + /* * Save info on the descriptor being overwritten. We have * to do the unmap now, but we cannot close it without @@ -474,6 +518,8 @@ do_dup(fdp, old, new, retval, td) fdp->fd_lastfile = new; *retval = new; + FILEDESC_UNLOCK(fdp); + /* * If we dup'd over a valid file, we now own the reference to it * and must dispose of it using closef() semantics (as if a @@ -632,8 +678,10 @@ close(td, uap) mtx_lock(&Giant); fdp = td->td_proc->p_fd; + FILEDESC_LOCK(fdp); if ((unsigned)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) { + FILEDESC_UNLOCK(fdp); error = EBADF; goto done2; } @@ -652,8 +700,12 @@ close(td, uap) fdp->fd_lastfile--; if (fd < fdp->fd_freefile) fdp->fd_freefile = fd; - if (fd < fdp->fd_knlistsize) + if (fd < fdp->fd_knlistsize) { + FILEDESC_UNLOCK(fdp); knote_fdclose(td, fd); + } else + FILEDESC_UNLOCK(fdp); + error = closef(fp, td); done2: mtx_unlock(&Giant); @@ -756,7 +808,6 @@ nfstat(td, uap) struct nstat nub; int error; - mtx_lock(&Giant); if ((error = fget(td, uap->fd, &fp)) != 0) goto done2; error = fo_stat(fp, &ub, td); @@ -792,6 +843,9 @@ fpathconf(td, uap) struct vnode *vp; int error; + fp = ffind_hold(td, uap->fd); + if (fp == NULL) + return (EBADF); mtx_lock(&Giant); if ((error = fget(td, uap->fd, &fp)) != 0) goto done2; @@ -800,6 +854,7 @@ fpathconf(td, uap) case DTYPE_PIPE: case DTYPE_SOCKET: if (uap->name != _PC_PIPE_BUF) { + fdrop(fp, td); error = EINVAL; goto done2; } @@ -837,9 +892,11 @@ fdalloc(td, want, result) register struct filedesc *fdp = td->td_proc->p_fd; register int i; int lim, last, nfiles; - struct file **newofile; + struct file **newofile, **oldofile; char *newofileflags; + FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + /* * Search for a free descriptor starting at the higher * of want or fd_freefile. If that fails, consider @@ -871,15 +928,19 @@ fdalloc(td, want, result) nfiles = NDEXTENT; else nfiles = 2 * fdp->fd_nfiles; + FILEDESC_UNLOCK(fdp); MALLOC(newofile, struct file **, nfiles * OFILESIZE, M_FILEDESC, M_WAITOK); + FILEDESC_LOCK(fdp); /* * deal with file-table extend race that might have occured * when malloc was blocked. */ if (fdp->fd_nfiles >= nfiles) { + FILEDESC_UNLOCK(fdp); FREE(newofile, M_FILEDESC); + FILEDESC_LOCK(fdp); continue; } newofileflags = (char *) &newofile[nfiles]; @@ -894,11 +955,15 @@ fdalloc(td, want, result) (i = sizeof(char) * fdp->fd_nfiles)); bzero(newofileflags + i, nfiles * sizeof(char) - i); if (fdp->fd_nfiles > NDFILE) - FREE(fdp->fd_ofiles, M_FILEDESC); + oldofile = fdp->fd_ofiles; + else + oldofile = NULL; fdp->fd_ofiles = newofile; fdp->fd_ofileflags = newofileflags; fdp->fd_nfiles = nfiles; fdexpand++; + if (oldofile != NULL) + FREE(oldofile, M_FILEDESC); } return (0); } @@ -917,6 +982,8 @@ fdavail(td, n) register struct file **fpp; register int i, lim, last; + FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) return (1); @@ -944,17 +1011,20 @@ falloc(td, resultfp, resultfd) register struct file *fp, *fq; int error, i; + sx_xlock(&filelist_lock); if (nfiles >= maxfiles) { + sx_xunlock(&filelist_lock); tablefull("file"); return (ENFILE); } + nfiles++; + sx_xunlock(&filelist_lock); /* * Allocate a new file descriptor. * If the process has file descriptor zero open, add to the list * of open files at that point, otherwise put it at the front of * the list of open files. */ - nfiles++; MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK | M_ZERO); /* @@ -962,21 +1032,32 @@ falloc(td, resultfp, resultfd) * allocating the slot, else a race might have shrunk it if we had * allocated it before the malloc. */ + FILEDESC_LOCK(p->p_fd); if ((error = fdalloc(td, 0, &i))) { + FILEDESC_UNLOCK(p->p_fd); + sx_xlock(&filelist_lock); nfiles--; + sx_xunlock(&filelist_lock); FREE(fp, M_FILE); return (error); } + mtx_init(&fp->f_mtx, "file structure", MTX_DEF); + fp->f_gcflag = 0; fp->f_count = 1; fp->f_cred = crhold(p->p_ucred); fp->f_ops = &badfileops; fp->f_seqcount = 1; + FILEDESC_UNLOCK(p->p_fd); + sx_xlock(&filelist_lock); + FILEDESC_LOCK(p->p_fd); if ((fq = p->p_fd->fd_ofiles[0])) { LIST_INSERT_AFTER(fq, fp, f_list); } else { LIST_INSERT_HEAD(&filehead, fp, f_list); } p->p_fd->fd_ofiles[i] = fp; + FILEDESC_UNLOCK(p->p_fd); + sx_xunlock(&filelist_lock); if (resultfp) *resultfp = fp; if (resultfd) @@ -991,10 +1072,14 @@ void ffree(fp) register struct file *fp; { + KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!")); + sx_xlock(&filelist_lock); LIST_REMOVE(fp, f_list); - crfree(fp->f_cred); nfiles--; + sx_xunlock(&filelist_lock); + crfree(fp->f_cred); + mtx_destroy(&fp->f_mtx); FREE(fp, M_FILE); } @@ -1010,6 +1095,8 @@ fdinit(td) MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0), M_FILEDESC, M_WAITOK | M_ZERO); + mtx_init(&newfdp->fd_fd.fd_mtx, "filedesc structure", MTX_DEF); + FILEDESC_LOCK(&newfdp->fd_fd); newfdp->fd_fd.fd_cdir = fdp->fd_cdir; if (newfdp->fd_fd.fd_cdir) VREF(newfdp->fd_fd.fd_cdir); @@ -1027,6 +1114,7 @@ fdinit(td) newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; newfdp->fd_fd.fd_nfiles = NDFILE; newfdp->fd_fd.fd_knlistsize = -1; + FILEDESC_UNLOCK(&newfdp->fd_fd); return (&newfdp->fd_fd); } @@ -1038,7 +1126,9 @@ struct filedesc * fdshare(p) struct proc *p; { + FILEDESC_LOCK(p->p_fd); p->p_fd->fd_refcnt++; + FILEDESC_UNLOCK(p->p_fd); return (p->p_fd); } @@ -1051,15 +1141,22 @@ fdcopy(td) { register struct filedesc *newfdp, *fdp = td->td_proc->p_fd; register struct file **fpp; - register int i; + register int i, j; /* Certain daemons might not have file descriptors. */ if (fdp == NULL) return (NULL); + FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + + FILEDESC_UNLOCK(fdp); MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0), M_FILEDESC, M_WAITOK); + FILEDESC_LOCK(fdp); bcopy(fdp, newfdp, sizeof(struct filedesc)); + FILEDESC_UNLOCK(fdp); + bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx)); + mtx_init(&newfdp->fd_mtx, "filedesc structure", MTX_DEF); if (newfdp->fd_cdir) VREF(newfdp->fd_cdir); if (newfdp->fd_rdir) @@ -1074,6 +1171,9 @@ fdcopy(td) * additional memory for the number of descriptors currently * in use. */ + FILEDESC_LOCK(fdp); + newfdp->fd_lastfile = fdp->fd_lastfile; + newfdp->fd_nfiles = fdp->fd_nfiles; if (newfdp->fd_lastfile < NDFILE) { newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; newfdp->fd_ofileflags = @@ -1085,11 +1185,31 @@ fdcopy(td) * for the file descriptors currently in use, * allowing the table to shrink. */ +retry: i = newfdp->fd_nfiles; while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) i /= 2; + FILEDESC_UNLOCK(fdp); MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE, M_FILEDESC, M_WAITOK); + FILEDESC_LOCK(fdp); + newfdp->fd_lastfile = fdp->fd_lastfile; + newfdp->fd_nfiles = fdp->fd_nfiles; + j = newfdp->fd_nfiles; + while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2) + j /= 2; + if (i != j) { + /* + * The size of the original table has changed. + * Go over once again. + */ + FILEDESC_UNLOCK(fdp); + FREE(newfdp->fd_ofiles, M_FILEDESC); + FILEDESC_LOCK(fdp); + newfdp->fd_lastfile = fdp->fd_lastfile; + newfdp->fd_nfiles = fdp->fd_nfiles; + goto retry; + } newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; } newfdp->fd_nfiles = i; @@ -1118,8 +1238,9 @@ fdcopy(td) fpp = newfdp->fd_ofiles; for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) { - if (*fpp != NULL) + if (*fpp != NULL) { fhold(*fpp); + } } return (newfdp); } @@ -1139,12 +1260,16 @@ fdfree(td) if (fdp == NULL) return; - if (--fdp->fd_refcnt > 0) + FILEDESC_LOCK(fdp); + if (--fdp->fd_refcnt > 0) { + FILEDESC_UNLOCK(fdp); return; + } /* * we are the last reference to the structure, we can * safely assume it will not change out from under us. */ + FILEDESC_UNLOCK(fdp); fpp = fdp->fd_ofiles; for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { if (*fpp) @@ -1162,6 +1287,7 @@ fdfree(td) FREE(fdp->fd_knlist, M_KQUEUE); if (fdp->fd_knhash) FREE(fdp->fd_knhash, M_KQUEUE); + mtx_destroy(&fdp->fd_mtx); FREE(fdp, M_FILEDESC); } @@ -1204,6 +1330,7 @@ setugidsafety(td) * note: fdp->fd_ofiles may be reallocated out from under us while * we are blocked in a close. Be careful! */ + FILEDESC_LOCK(fdp); for (i = 0; i <= fdp->fd_lastfile; i++) { if (i > 2) break; @@ -1214,8 +1341,11 @@ setugidsafety(td) if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0) (void) munmapfd(td, i); #endif - if (i < fdp->fd_knlistsize) + if (i < fdp->fd_knlistsize) { + FILEDESC_UNLOCK(fdp); knote_fdclose(td, i); + FILEDESC_LOCK(fdp); + } /* * NULL-out descriptor prior to close to avoid * a race while close blocks. @@ -1225,11 +1355,14 @@ setugidsafety(td) fdp->fd_ofileflags[i] = 0; if (i < fdp->fd_freefile) fdp->fd_freefile = i; + FILEDESC_UNLOCK(fdp); (void) closef(fp, td); + FILEDESC_LOCK(fdp); } } while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; + FILEDESC_UNLOCK(fdp); } /* @@ -1246,6 +1379,8 @@ fdcloseexec(td) if (fdp == NULL) return; + FILEDESC_LOCK(fdp); + /* * We cannot cache fd_ofiles or fd_ofileflags since operations * may block and rip them out from under us. @@ -1259,8 +1394,11 @@ fdcloseexec(td) if (fdp->fd_ofileflags[i] & UF_MAPPED) (void) munmapfd(td, i); #endif - if (i < fdp->fd_knlistsize) + if (i < fdp->fd_knlistsize) { + FILEDESC_UNLOCK(fdp); knote_fdclose(td, i); + FILEDESC_LOCK(fdp); + } /* * NULL-out descriptor prior to close to avoid * a race while close blocks. @@ -1270,11 +1408,14 @@ fdcloseexec(td) fdp->fd_ofileflags[i] = 0; if (i < fdp->fd_freefile) fdp->fd_freefile = i; + FILEDESC_UNLOCK(fdp); (void) closef(fp, td); + FILEDESC_LOCK(fdp); } } while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; + FILEDESC_UNLOCK(fdp); } /* @@ -1314,6 +1455,68 @@ closef(fp, td) return (fdrop(fp, td)); } +/* + * Find the struct file 'fd' in process 'p' and bump it's refcount + * struct file is not locked on return. + */ +struct file * +ffind_hold(td, fd) + struct thread *td; + int fd; +{ + struct filedesc *fdp; + struct file *fp; + + if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) + return (NULL); + FILEDESC_LOCK(fdp); + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + fp->f_ops == &badfileops) + fp = NULL; + else + fhold(fp); + FILEDESC_UNLOCK(fdp); + return (fp); +} + +/* + * Find the struct file 'fd' in process 'p' and bump it's refcount, + * struct file is locked on return. + */ +struct file * +ffind_lock(td, fd) + struct thread *td; + int fd; +{ + struct filedesc *fdp; + struct file *fp; + + if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) + return (NULL); + FILEDESC_LOCK(fdp); + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + fp->f_ops == &badfileops) { + fp = NULL; + } else { + FILE_LOCK(fp); + fhold_locked(fp); + } + FILEDESC_UNLOCK(fdp); + return (fp); +} + +int +fdrop(fp, td) + struct file *fp; + struct thread *td; +{ + + FILE_LOCK(fp); + return (fdrop_locked(fp, td)); +} + /* * Extract the file pointer associated with the specified descriptor for * the current user process. If no error occured 0 is returned, *fpp @@ -1478,7 +1681,7 @@ fputsock(struct socket *so) } int -fdrop(fp, td) +fdrop_locked(fp, td) struct file *fp; struct thread *td; { @@ -1486,8 +1689,12 @@ fdrop(fp, td) struct vnode *vp; int error; - if (--fp->f_count > 0) + FILE_LOCK_ASSERT(fp, MA_OWNED); + + if (--fp->f_count > 0) { + FILE_UNLOCK(fp); return (0); + } if (fp->f_count < 0) panic("fdrop: count < 0"); if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { @@ -1496,8 +1703,10 @@ fdrop(fp, td) lf.l_len = 0; lf.l_type = F_UNLCK; vp = (struct vnode *)fp->f_data; + FILE_UNLOCK(fp); (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); - } + } else + FILE_UNLOCK(fp); if (fp->f_ops != &badfileops) error = fo_close(fp, td); else @@ -1527,30 +1736,29 @@ flock(td, uap) struct thread *td; register struct flock_args *uap; { - register struct filedesc *fdp = td->td_proc->p_fd; register struct file *fp; struct vnode *vp; struct flock lf; int error; - mtx_lock(&Giant); - - if ((unsigned)uap->fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[uap->fd]) == NULL) { - error = EBADF; - goto done2; - } + fp = ffind_hold(td, uap->fd); + if (fp == NULL) + return (EBADF); if (fp->f_type != DTYPE_VNODE) { - error = EOPNOTSUPP; - goto done2; + fdrop(fp, td); + return (EOPNOTSUPP); } + + mtx_lock(&Giant); vp = (struct vnode *)fp->f_data; lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (uap->how & LOCK_UN) { lf.l_type = F_UNLCK; + FILE_LOCK(fp); fp->f_flag &= ~FHASLOCK; + FILE_UNLOCK(fp); error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); goto done2; } @@ -1562,12 +1770,13 @@ flock(td, uap) error = EBADF; goto done2; } + FILE_LOCK(fp); fp->f_flag |= FHASLOCK; - if (uap->how & LOCK_NB) - error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK); - else - error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT); + FILE_UNLOCK(fp); + error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, + (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); done2: + fdrop(fp, td); mtx_unlock(&Giant); return (error); } @@ -1619,8 +1828,10 @@ dupfdopen(td, fdp, indx, dfd, mode, error) * of file descriptors, or the fd to be dup'd has already been * closed, then reject. */ + FILEDESC_LOCK(fdp); if ((u_int)dfd >= fdp->fd_nfiles || (wfp = fdp->fd_ofiles[dfd]) == NULL) { + FILEDESC_UNLOCK(fdp); return (EBADF); } @@ -1642,8 +1853,12 @@ dupfdopen(td, fdp, indx, dfd, mode, error) * Check that the mode the file is being opened for is a * subset of the mode of the existing descriptor. */ - if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) + FILE_LOCK(wfp); + if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { + FILE_UNLOCK(wfp); + FILEDESC_UNLOCK(fdp); return (EACCES); + } fp = fdp->fd_ofiles[indx]; #if 0 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED) @@ -1651,15 +1866,19 @@ dupfdopen(td, fdp, indx, dfd, mode, error) #endif fdp->fd_ofiles[indx] = wfp; fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; - fhold(wfp); + fhold_locked(wfp); + FILE_UNLOCK(wfp); if (indx > fdp->fd_lastfile) fdp->fd_lastfile = indx; + if (fp != NULL) + FILE_LOCK(fp); + FILEDESC_UNLOCK(fdp); /* * we now own the reference to fp that the ofiles[] array * used to own. Release it. */ - if (fp) - fdrop(fp, td); + if (fp != NULL) + fdrop_locked(fp, td); return (0); case ENXIO: @@ -1676,12 +1895,6 @@ dupfdopen(td, fdp, indx, dfd, mode, error) fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; fdp->fd_ofileflags[dfd] = 0; - /* - * we now own the reference to fp that the ofiles[] array - * used to own. Release it. - */ - if (fp) - fdrop(fp, td); /* * Complete the clean up of the filedesc structure by * recomputing the various hints. @@ -1696,9 +1909,20 @@ dupfdopen(td, fdp, indx, dfd, mode, error) if (dfd < fdp->fd_freefile) fdp->fd_freefile = dfd; } + if (fp != NULL) + FILE_LOCK(fp); + FILEDESC_UNLOCK(fdp); + + /* + * we now own the reference to fp that the ofiles[] array + * used to own. Release it. + */ + if (fp != NULL) + fdrop_locked(fp, td); return (0); default: + FILEDESC_UNLOCK(fdp); return (error); } /* NOTREACHED */ @@ -1713,26 +1937,34 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS) int error; struct file *fp; + sx_slock(&filelist_lock); if (!req->oldptr) { /* * overestimate by 10 files */ - return (SYSCTL_OUT(req, 0, sizeof(filehead) + - (nfiles + 10) * sizeof(struct file))); + error = SYSCTL_OUT(req, 0, sizeof(filehead) + + (nfiles + 10) * sizeof(struct file)); + sx_sunlock(&filelist_lock); + return (error); } error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead)); - if (error) + if (error) { + sx_sunlock(&filelist_lock); return (error); + } /* * followed by an array of file structures */ LIST_FOREACH(fp, &filehead, f_list) { error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file)); - if (error) + if (error) { + sx_sunlock(&filelist_lock); return (error); + } } + sx_sunlock(&filelist_lock); return (0); } @@ -1842,3 +2074,14 @@ badfo_close(fp, td) SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR, fildesc_drvinit,NULL) + +static void filelistinit __P((void *)); +SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL) + +/* ARGSUSED*/ +static void +filelistinit(dummy) + void *dummy; +{ + sx_init(&filelist_lock, "filelist lock"); +} diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 6bec0568273e..038b23335c19 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -372,15 +372,20 @@ kqueue(struct thread *td, struct kqueue_args *uap) error = falloc(td, &fp, &fd); if (error) goto done2; + kq = malloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); + TAILQ_INIT(&kq->kq_head); + FILE_LOCK(fp); fp->f_flag = FREAD | FWRITE; fp->f_type = DTYPE_KQUEUE; fp->f_ops = &kqueueops; - kq = malloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); TAILQ_INIT(&kq->kq_head); fp->f_data = (caddr_t)kq; + FILE_UNLOCK(fp); + FILEDESC_LOCK(fdp); td->td_retval[0] = fd; if (fdp->fd_knlistsize < 0) fdp->fd_knlistsize = 0; /* this process has a kq */ + FILEDESC_UNLOCK(fdp); kq->kq_fdp = fdp; done2: mtx_unlock(&Giant); @@ -409,19 +414,19 @@ kevent(struct thread *td, struct kevent_args *uap) struct timespec ts; int i, n, nerrors, error; - mtx_lock(&Giant); - if ((error = fget(td, uap->fd, &fp)) != 0) - goto done; - if (fp->f_type != DTYPE_KQUEUE) { - error = EBADF; - goto done; + fp = ffind_hold(td, uap->fd); + if (fp == NULL || fp->f_type != DTYPE_KQUEUE) { + if (fp != NULL) + fdrop(fp, td); + return (EBADF); } if (uap->timeout != NULL) { error = copyin(uap->timeout, &ts, sizeof(ts)); if (error) - goto done; + goto done_nogiant; uap->timeout = &ts; } + mtx_lock(&Giant); kq = (struct kqueue *)fp->f_data; nerrors = 0; @@ -462,9 +467,10 @@ kevent(struct thread *td, struct kevent_args *uap) error = kqueue_scan(fp, uap->nevents, uap->eventlist, uap->timeout, td); done: + mtx_unlock(&Giant); +done_nogiant: if (fp != NULL) fdrop(fp, td); - mtx_unlock(&Giant); return (error); } @@ -521,11 +527,14 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td) return (EINVAL); } + FILEDESC_LOCK(fdp); if (fops->f_isfd) { /* validate descriptor */ if ((u_int)kev->ident >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[kev->ident]) == NULL) + (fp = fdp->fd_ofiles[kev->ident]) == NULL) { + FILEDESC_UNLOCK(fdp); return (EBADF); + } fhold(fp); if (kev->ident < fdp->fd_knlistsize) { @@ -547,6 +556,7 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td) break; } } + FILEDESC_UNLOCK(fdp); if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { error = ENOENT; @@ -633,12 +643,15 @@ static int kqueue_scan(struct file *fp, int maxevents, struct kevent *ulistp, const struct timespec *tsp, struct thread *td) { - struct kqueue *kq = (struct kqueue *)fp->f_data; + struct kqueue *kq; struct kevent *kevp; struct timeval atv, rtv, ttv; struct knote *kn, marker; int s, count, timeout, nkev = 0, error = 0; + FILE_LOCK_ASSERT(fp, MA_NOTOWNED); + + kq = (struct kqueue *)fp->f_data; count = maxevents; if (count == 0) goto done; @@ -788,10 +801,11 @@ kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct thread *td) static int kqueue_poll(struct file *fp, int events, struct ucred *cred, struct thread *td) { - struct kqueue *kq = (struct kqueue *)fp->f_data; + struct kqueue *kq; int revents = 0; int s = splnet(); + kq = (struct kqueue *)fp->f_data; if (events & (POLLIN | POLLRDNORM)) { if (kq->kq_count) { revents |= events & (POLLIN | POLLRDNORM); @@ -808,8 +822,9 @@ kqueue_poll(struct file *fp, int events, struct ucred *cred, struct thread *td) static int kqueue_stat(struct file *fp, struct stat *st, struct thread *td) { - struct kqueue *kq = (struct kqueue *)fp->f_data; + struct kqueue *kq; + kq = (struct kqueue *)fp->f_data; bzero((void *)st, sizeof(*st)); st->st_size = kq->kq_count; st->st_blksize = sizeof(struct kevent); @@ -826,6 +841,7 @@ kqueue_close(struct file *fp, struct thread *td) struct knote **knp, *kn, *kn0; int i; + FILEDESC_LOCK(fdp); for (i = 0; i < fdp->fd_knlistsize; i++) { knp = &SLIST_FIRST(&fdp->fd_knlist[i]); kn = *knp; @@ -833,9 +849,12 @@ kqueue_close(struct file *fp, struct thread *td) kn0 = SLIST_NEXT(kn, kn_link); if (kq == kn->kn_kq) { kn->kn_fop->f_detach(kn); - fdrop(kn->kn_fp, td); - knote_free(kn); *knp = kn0; + FILE_LOCK(kn->kn_fp); + FILEDESC_UNLOCK(fdp); + fdrop_locked(kn->kn_fp, td); + knote_free(kn); + FILEDESC_LOCK(fdp); } else { knp = &SLIST_NEXT(kn, kn_link); } @@ -850,9 +869,11 @@ kqueue_close(struct file *fp, struct thread *td) kn0 = SLIST_NEXT(kn, kn_link); if (kq == kn->kn_kq) { kn->kn_fop->f_detach(kn); - /* XXX non-fd release of kn->kn_ptr */ - knote_free(kn); *knp = kn0; + /* XXX non-fd release of kn->kn_ptr */ + FILEDESC_UNLOCK(fdp); + knote_free(kn); + FILEDESC_LOCK(fdp); } else { knp = &SLIST_NEXT(kn, kn_link); } @@ -860,6 +881,7 @@ kqueue_close(struct file *fp, struct thread *td) } } } + FILEDESC_UNLOCK(fdp); free(kq, M_KQUEUE); fp->f_data = NULL; @@ -915,16 +937,21 @@ void knote_fdclose(struct thread *td, int fd) { struct filedesc *fdp = td->td_proc->p_fd; - struct klist *list = &fdp->fd_knlist[fd]; + struct klist *list; + FILEDESC_LOCK(fdp); + list = &fdp->fd_knlist[fd]; + FILEDESC_UNLOCK(fdp); knote_remove(td, list); } static void knote_attach(struct knote *kn, struct filedesc *fdp) { - struct klist *list; - int size; + struct klist *list, *oldlist; + int size, newsize; + + FILEDESC_LOCK(fdp); if (! kn->kn_fop->f_isfd) { if (fdp->fd_knhashmask == 0) @@ -935,23 +962,42 @@ knote_attach(struct knote *kn, struct filedesc *fdp) } if (fdp->fd_knlistsize <= kn->kn_id) { +retry: size = fdp->fd_knlistsize; while (size <= kn->kn_id) size += KQEXTENT; + FILEDESC_UNLOCK(fdp); MALLOC(list, struct klist *, size * sizeof(struct klist *), M_KQUEUE, M_WAITOK); + FILEDESC_LOCK(fdp); + newsize = fdp->fd_knlistsize; + while (newsize <= kn->kn_id) + newsize += KQEXTENT; + if (newsize != size) { + FILEDESC_UNLOCK(fdp); + free(list, M_TEMP); + FILEDESC_LOCK(fdp); + goto retry; + } bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list, fdp->fd_knlistsize * sizeof(struct klist *)); bzero((caddr_t)list + fdp->fd_knlistsize * sizeof(struct klist *), (size - fdp->fd_knlistsize) * sizeof(struct klist *)); if (fdp->fd_knlist != NULL) - FREE(fdp->fd_knlist, M_KQUEUE); + oldlist = fdp->fd_knlist; + else + oldlist = NULL; fdp->fd_knlistsize = size; fdp->fd_knlist = list; + FILEDESC_UNLOCK(fdp); + if (oldlist != NULL) + FREE(oldlist, M_KQUEUE); + FILEDESC_LOCK(fdp); } list = &fdp->fd_knlist[kn->kn_id]; done: + FILEDESC_UNLOCK(fdp); SLIST_INSERT_HEAD(list, kn, kn_link); kn->kn_status = 0; } @@ -966,16 +1012,20 @@ knote_drop(struct knote *kn, struct thread *td) struct filedesc *fdp = td->td_proc->p_fd; struct klist *list; + FILEDESC_LOCK(fdp); if (kn->kn_fop->f_isfd) list = &fdp->fd_knlist[kn->kn_id]; else list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; + if (kn->kn_fop->f_isfd) + FILE_LOCK(kn->kn_fp); + FILEDESC_UNLOCK(fdp); SLIST_REMOVE(list, kn, knote, kn_link); if (kn->kn_status & KN_QUEUED) knote_dequeue(kn); if (kn->kn_fop->f_isfd) - fdrop(kn->kn_fp, td); + fdrop_locked(kn->kn_fp, td); knote_free(kn); } diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 670d5dddd117..3fe2ab367b7f 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -281,13 +281,16 @@ execve(td, uap) * For security and other reasons, the file descriptor table cannot * be shared after an exec. */ + FILEDESC_LOCK(p->p_fd); if (p->p_fd->fd_refcnt > 1) { struct filedesc *tmp; tmp = fdcopy(td); + FILEDESC_UNLOCK(p->p_fd); fdfree(td); p->p_fd = tmp; - } + } else + FILEDESC_UNLOCK(p->p_fd); /* * For security and other reasons, signal handlers cannot diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index a0b86889141c..da4652207b29 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -269,14 +269,18 @@ fork1(td, flags, procp) * Unshare file descriptors (from parent.) */ if (flags & RFFDG) { + FILEDESC_LOCK(p1->p_fd); if (p1->p_fd->fd_refcnt > 1) { struct filedesc *newfd; + newfd = fdcopy(td); + FILEDESC_UNLOCK(p1->p_fd); PROC_LOCK(p1); fdfree(td); p1->p_fd = newfd; PROC_UNLOCK(p1); - } + } else + FILEDESC_UNLOCK(p1->p_fd); } *procp = NULL; return (0); @@ -519,9 +523,11 @@ fork1(td, flags, procp) if (flags & RFCFDG) fd = fdinit(td); - else if (flags & RFFDG) + else if (flags & RFFDG) { + FILEDESC_LOCK(p1->p_fd); fd = fdcopy(td); - else + FILEDESC_UNLOCK(p1->p_fd); + } else fd = fdshare(p1); PROC_LOCK(p2); p2->p_fd = fd; diff --git a/sys/kern/subr_acl_posix1e.c b/sys/kern/subr_acl_posix1e.c index b50c89645d73..63be63d733a3 100644 --- a/sys/kern/subr_acl_posix1e.c +++ b/sys/kern/subr_acl_posix1e.c @@ -703,6 +703,7 @@ __acl_get_fd(struct thread *td, struct __acl_get_fd_args *uap) if (error == 0) { error = vacl_get_acl(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -724,6 +725,7 @@ __acl_set_fd(struct thread *td, struct __acl_set_fd_args *uap) if (error == 0) { error = vacl_set_acl(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -767,6 +769,7 @@ __acl_delete_fd(struct thread *td, struct __acl_delete_fd_args *uap) if (error == 0) { error = vacl_delete(td, (struct vnode *)fp->f_data, SCARG(uap, type)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -811,6 +814,7 @@ __acl_aclcheck_fd(struct thread *td, struct __acl_aclcheck_fd_args *uap) if (error == 0) { error = vacl_aclcheck(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 352e3254857f..df85cf257f4e 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -83,6 +83,30 @@ static int dofileread __P((struct thread *, struct file *, int, void *, static int dofilewrite __P((struct thread *, struct file *, int, const void *, size_t, off_t, int)); +struct file* +holdfp(fdp, fd, flag) + struct filedesc* fdp; + int fd, flag; +{ + struct file* fp; + + FILEDESC_LOCK(fdp); + if (((u_int)fd) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL) { + FILEDESC_UNLOCK(fdp); + return (NULL); + } + FILE_LOCK(fp); + FILEDESC_UNLOCK(fdp); + if ((fp->f_flag & flag) == 0) { + FILE_UNLOCK(fp); + return (NULL); + } + fp->f_count++; + FILE_UNLOCK(fp); + return (fp); +} + /* * Read system call. */ @@ -137,17 +161,18 @@ pread(td, uap) struct file *fp; int error; - mtx_lock(&Giant); - if ((error = fget_read(td, uap->fd, &fp)) == 0) { - if (fp->f_type == DTYPE_VNODE) { - error = dofileread(td, fp, uap->fd, uap->buf, - uap->nbyte, uap->offset, FOF_OFFSET); - } else { - error = ESPIPE; - } - fdrop(fp, td); + fp = holdfp(td->td_proc->p_fd, uap->fd, FREAD); + if (fp == NULL) + return (EBADF); + if (fp->f_type != DTYPE_VNODE) { + error = ESPIPE; + } else { + mtx_lock(&Giant); + error = dofileread(td, fp, uap->fd, uap->buf, uap->nbyte, + uap->offset, FOF_OFFSET); + mtx_unlock(&Giant); } - mtx_unlock(&Giant); + fdrop(fp, td); return(error); } @@ -381,7 +406,6 @@ pwrite(td, uap) } else { error = EBADF; /* this can't be right */ } - mtx_unlock(&Giant); return(error); } @@ -592,26 +616,27 @@ ioctl(td, uap) long align; } ubuf; - mtx_lock(&Giant); - fdp = td->td_proc->p_fd; - if ((u_int)uap->fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[uap->fd]) == NULL) { - error = EBADF; - goto done2; - } - + fp = ffind_hold(td, uap->fd); + if (fp == NULL) + return (EBADF); if ((fp->f_flag & (FREAD | FWRITE)) == 0) { - error = EBADF; - goto done2; + fdrop(fp, td); + return (EBADF); } - + fdp = td->td_proc->p_fd; switch (com = uap->com) { case FIONCLEX: + FILEDESC_LOCK(fdp); fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; - goto done2; + FILEDESC_UNLOCK(fdp); + fdrop(fp, td); + return (0); case FIOCLEX: + FILEDESC_LOCK(fdp); fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; - goto done2; + FILEDESC_UNLOCK(fdp); + fdrop(fp, td); + return (0); } /* @@ -620,12 +645,11 @@ ioctl(td, uap) */ size = IOCPARM_LEN(com); if (size > IOCPARM_MAX) { - error = ENOTTY; - goto done2; + fdrop(fp, td); + return (ENOTTY); } - fhold(fp); - + mtx_lock(&Giant); memp = NULL; if (size > sizeof (ubuf.stkbuf)) { memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); @@ -640,7 +664,7 @@ ioctl(td, uap) if (memp) free(memp, M_IOCTLOPS); fdrop(fp, td); - goto done2; + goto done; } } else { *(caddr_t *)data = uap->data; @@ -658,18 +682,22 @@ ioctl(td, uap) switch (com) { case FIONBIO: + FILE_LOCK(fp); if ((tmp = *(int *)data)) fp->f_flag |= FNONBLOCK; else fp->f_flag &= ~FNONBLOCK; + FILE_UNLOCK(fp); error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td); break; case FIOASYNC: + FILE_LOCK(fp); if ((tmp = *(int *)data)) fp->f_flag |= FASYNC; else fp->f_flag &= ~FASYNC; + FILE_UNLOCK(fp); error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td); break; @@ -686,7 +714,7 @@ ioctl(td, uap) if (memp) free(memp, M_IOCTLOPS); fdrop(fp, td); -done2: +done: mtx_unlock(&Giant); return (error); } @@ -713,6 +741,7 @@ select(td, uap) register struct thread *td; register struct select_args *uap; { + struct filedesc *fdp; /* * The magic 2048 here is chosen to be just enough for FD_SETSIZE * infds with the new FD_SETSIZE of 1024, and more than enough for @@ -728,11 +757,13 @@ select(td, uap) if (uap->nd < 0) return (EINVAL); - + fdp = td->td_proc->p_fd; mtx_lock(&Giant); + FILEDESC_LOCK(fdp); if (uap->nd > td->td_proc->p_fd->fd_nfiles) uap->nd = td->td_proc->p_fd->fd_nfiles; /* forgiving; slightly wrong */ + FILEDESC_UNLOCK(fdp); /* * Allocate just enough bits for the non-null fd_sets. Use the @@ -887,6 +918,11 @@ select(td, uap) return (error); } +/* + * Used to hold then release a group of fds for select(2). + * Hold (hold == 1) or release (hold == 0) a group of filedescriptors. + * if holding then use ibits setting the bits in obits, otherwise use obits. + */ static int selholddrop(td, ibits, obits, nfd, hold) struct thread *td; @@ -898,6 +934,7 @@ selholddrop(td, ibits, obits, nfd, hold) fd_mask bits; struct file *fp; + FILEDESC_LOCK(fdp); for (i = 0; i < nfd; i += NFDBITS) { if (hold) bits = ibits[i/NFDBITS]; @@ -908,16 +945,28 @@ selholddrop(td, ibits, obits, nfd, hold) if (!(bits & 1)) continue; fp = fdp->fd_ofiles[fd]; - if (fp == NULL) + if (fp == NULL) { + FILEDESC_UNLOCK(fdp); return (EBADF); + } if (hold) { fhold(fp); obits[(fd)/NFDBITS] |= ((fd_mask)1 << ((fd) % NFDBITS)); - } else + } else { + /* XXX: optimize by making a special + * version of fdrop that only unlocks + * the filedesc if needed? This would + * redcuce the number of lock/unlock + * pairs by quite a bit. + */ + FILEDESC_UNLOCK(fdp); fdrop(fp, td); + FILEDESC_LOCK(fdp); + } } } + FILEDESC_UNLOCK(fdp); return (0); } @@ -927,7 +976,6 @@ selscan(td, ibits, obits, nfd) fd_mask **ibits, **obits; int nfd; { - struct filedesc *fdp = td->td_proc->p_fd; int msk, i, fd; fd_mask bits; struct file *fp; @@ -944,7 +992,7 @@ selscan(td, ibits, obits, nfd) for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { if (!(bits & 1)) continue; - fp = fdp->fd_ofiles[fd]; + fp = ffind_hold(td, fd); if (fp == NULL) return (EBADF); if (fo_poll(fp, flag[msk], fp->f_cred, td)) { @@ -952,6 +1000,7 @@ selscan(td, ibits, obits, nfd) ((fd_mask)1 << ((fd) % NFDBITS)); n++; } + fdrop(fp, td); } } } @@ -1116,6 +1165,7 @@ pollholddrop(td, fds, nfd, hold) int i; struct file *fp; + FILEDESC_LOCK(fdp); for (i = 0; i < nfd; i++, fds++) { if (0 <= fds->fd && fds->fd < fdp->fd_nfiles) { fp = fdp->fd_ofiles[fds->fd]; @@ -1125,10 +1175,15 @@ pollholddrop(td, fds, nfd, hold) fds->revents = 1; } else fds->revents = 0; - } else if(fp != NULL && fds->revents) - fdrop(fp, td); + } else if(fp != NULL && fds->revents) { + FILE_LOCK(fp); + FILEDESC_UNLOCK(fdp); + fdrop_locked(fp, td); + FILEDESC_LOCK(fdp); + } } } + FILEDESC_UNLOCK(fdp); return (0); } @@ -1144,13 +1199,17 @@ pollscan(td, fds, nfd) int n = 0; for (i = 0; i < nfd; i++, fds++) { + FILEDESC_LOCK(fdp); if (fds->fd >= fdp->fd_nfiles) { fds->revents = POLLNVAL; n++; + FILEDESC_UNLOCK(fdp); } else if (fds->fd < 0) { fds->revents = 0; + FILEDESC_UNLOCK(fdp); } else { fp = fdp->fd_ofiles[fds->fd]; + FILEDESC_UNLOCK(fdp); if (fp == NULL) { fds->revents = POLLNVAL; n++; diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index fd16065a6c62..49f1959d10b7 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -205,27 +205,33 @@ pipe(td, uap) * to avoid races against processes which manage to dup() the read * side while we are blocked trying to allocate the write side. */ + FILE_LOCK(rf); rf->f_flag = FREAD | FWRITE; rf->f_type = DTYPE_PIPE; rf->f_data = (caddr_t)rpipe; rf->f_ops = &pipeops; + FILE_UNLOCK(rf); error = falloc(td, &wf, &fd); if (error) { + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[td->td_retval[0]] == rf) { fdp->fd_ofiles[td->td_retval[0]] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(rf, td); - } + } else + FILEDESC_UNLOCK(fdp); fdrop(rf, td); /* rpipe has been closed by fdrop(). */ pipeclose(wpipe); return (error); } + FILE_LOCK(wf); wf->f_flag = FREAD | FWRITE; wf->f_type = DTYPE_PIPE; wf->f_data = (caddr_t)wpipe; wf->f_ops = &pipeops; + FILE_UNLOCK(wf); td->td_retval[1] = fd; - rpipe->pipe_peer = wpipe; wpipe->pipe_peer = rpipe; fdrop(rf, td); @@ -495,9 +501,12 @@ pipe_read(fp, uio, cred, flags, td) * Handle non-blocking mode operation or * wait for more data. */ + FILE_LOCK(fp); if (fp->f_flag & FNONBLOCK) { + FILE_UNLOCK(fp); error = EAGAIN; } else { + FILE_UNLOCK(fp); rpipe->pipe_state |= PIPE_WANTR; if ((error = tsleep(rpipe, PRIBIO | PCATCH, "piperd", 0)) == 0) @@ -825,15 +834,18 @@ pipe_write(fp, uio, cred, flags, td) * The direct write mechanism will detect the reader going * away on us. */ + FILE_LOCK(fp); if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && (fp->f_flag & FNONBLOCK) == 0 && (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { + FILE_UNLOCK(fp); error = pipe_direct_write( wpipe, uio); if (error) break; continue; - } + } else + FILE_UNLOCK(fp); #endif /* @@ -961,10 +973,13 @@ pipe_write(fp, uio, cred, flags, td) /* * don't block on non-blocking I/O */ + FILE_LOCK(fp); if (fp->f_flag & FNONBLOCK) { + FILE_UNLOCK(fp); error = EAGAIN; break; } + FILE_UNLOCK(fp); /* * We have no more space and have something to offer, @@ -1236,8 +1251,9 @@ pipeclose(cpipe) static int pipe_kqfilter(struct file *fp, struct knote *kn) { - struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; + struct pipe *cpipe; + cpipe = (struct pipe *)kn->kn_fp->f_data; switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &pipe_rfiltops; diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c index 04a419a8f484..13f4000acc88 100644 --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -197,10 +197,11 @@ soo_close(fp, td) int error = 0; struct socket *so; + so = (struct socket *)fp->f_data; fp->f_ops = &badfileops; - if ((so = (struct socket *)fp->f_data) != NULL) { - fp->f_data = NULL; + fp->f_data = 0; + + if (so) error = soclose(so); - } return (error); } diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 6860d76834a6..19d62fdb6e06 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -134,16 +134,20 @@ socket(td, uap) fhold(fp); error = socreate(uap->domain, &so, uap->type, uap->protocol, td->td_proc->p_ucred, td); + FILEDESC_LOCK(fdp); if (error) { if (fdp->fd_ofiles[fd] == fp) { fdp->fd_ofiles[fd] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp, td); - } + } else + FILEDESC_UNLOCK(fdp); } else { fp->f_data = (caddr_t)so; /* already has ref count */ fp->f_flag = FREAD|FWRITE; fp->f_ops = &socketops; fp->f_type = DTYPE_SOCKET; + FILEDESC_UNLOCK(fdp); td->td_retval[0] = fd; } fdrop(fp, td); @@ -306,11 +310,13 @@ accept1(td, uap, compat) if (head->so_sigio != NULL) fsetown(fgetown(head->so_sigio), &so->so_sigio); + FILE_LOCK(nfp); soref(so); /* file descriptor reference */ nfp->f_data = (caddr_t)so; /* nfp has ref count from falloc */ nfp->f_flag = fflag; nfp->f_ops = &socketops; nfp->f_type = DTYPE_SOCKET; + FILE_UNLOCK(nfp); sa = 0; error = soaccept(so, &sa); if (error) { @@ -357,9 +363,13 @@ accept1(td, uap, compat) * out from under us. */ if (error) { + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[fd] == nfp) { fdp->fd_ofiles[fd] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(nfp, td); + } else { + FILEDESC_UNLOCK(fdp); } } splx(s); @@ -510,24 +520,37 @@ socketpair(td, uap) if (error) goto free4; } - fp1->f_flag = fp2->f_flag = FREAD|FWRITE; - fp1->f_ops = fp2->f_ops = &socketops; - fp1->f_type = fp2->f_type = DTYPE_SOCKET; + FILE_LOCK(fp1); + fp1->f_flag = FREAD|FWRITE; + fp1->f_ops = &socketops; + fp1->f_type = DTYPE_SOCKET; + FILE_UNLOCK(fp1); + FILE_LOCK(fp2); + fp2->f_flag = FREAD|FWRITE; + fp2->f_ops = &socketops; + fp2->f_type = DTYPE_SOCKET; + FILE_UNLOCK(fp2); error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); fdrop(fp1, td); fdrop(fp2, td); goto done2; free4: + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[sv[1]] == fp2) { fdp->fd_ofiles[sv[1]] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp2, td); - } + } else + FILEDESC_UNLOCK(fdp); fdrop(fp2, td); free3: + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[sv[0]] == fp1) { fdp->fd_ofiles[sv[0]] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp1, td); - } + } else + FILEDESC_UNLOCK(fdp); fdrop(fp1, td); free2: (void)soclose(so2); @@ -1932,4 +1955,3 @@ sendfile(struct thread *td, struct sendfile_args *uap) mtx_unlock(&Giant); return (error); } - diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index a7ffcff77820..546124dcf1ee 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -57,6 +57,7 @@ #include #include #include +#include #include @@ -535,7 +536,9 @@ unp_attach(so) unp_count++; LIST_INIT(&unp->unp_refs); unp->unp_socket = so; + FILEDESC_LOCK(curproc->p_fd); unp->unp_rvnode = curthread->td_proc->p_fd->fd_rdir; + FILEDESC_UNLOCK(curproc->p_fd); LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead, unp, unp_link); so->so_pcb = (caddr_t)unp; @@ -628,7 +631,9 @@ unp_bind(unp, nam, td) } VATTR_NULL(&vattr); vattr.va_type = VSOCK; + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); + FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE); error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -1006,8 +1011,10 @@ unp_externalize(control, controlp) unp_freerights(rp, newfds); goto next; } + FILEDESC_LOCK(td->td_proc->p_fd); /* if the new FD's will not fit free them. */ if (!fdavail(td, newfds)) { + FILEDESC_UNLOCK(td->td_proc->p_fd); error = EMSGSIZE; unp_freerights(rp, newfds); goto next; @@ -1022,6 +1029,7 @@ unp_externalize(control, controlp) *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { + FILEDESC_UNLOCK(td->td_proc->p_fd); error = E2BIG; unp_freerights(rp, newfds); goto next; @@ -1034,10 +1042,13 @@ unp_externalize(control, controlp) panic("unp_externalize fdalloc failed"); fp = *rp++; td->td_proc->p_fd->fd_ofiles[f] = fp; + FILE_LOCK(fp); fp->f_msgcount--; + FILE_UNLOCK(fp); unp_rights--; *fdp++ = f; } + FILEDESC_UNLOCK(td->td_proc->p_fd); } else { /* We can just copy anything else across */ if (error || controlp == NULL) goto next; @@ -1064,6 +1075,7 @@ unp_externalize(control, controlp) cm = NULL; } } + FILEDESC_UNLOCK(td->td_proc->p_fd); m_freem(control); @@ -1148,10 +1160,12 @@ unp_internalize(controlp, td) * If not, reject the entire operation. */ fdp = data; + FILEDESC_LOCK(fdescp); for (i = 0; i < oldfds; i++) { fd = *fdp++; if ((unsigned)fd >= fdescp->fd_nfiles || fdescp->fd_ofiles[fd] == NULL) { + FILEDESC_UNLOCK(fdescp); error = EBADF; goto out; } @@ -1164,6 +1178,7 @@ unp_internalize(controlp, td) *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { + FILEDESC_UNLOCK(fdescp); error = E2BIG; goto out; } @@ -1174,10 +1189,13 @@ unp_internalize(controlp, td) for (i = 0; i < oldfds; i++) { fp = fdescp->fd_ofiles[*fdp++]; *rp++ = fp; + FILE_LOCK(fp); fp->f_count++; fp->f_msgcount++; + FILE_UNLOCK(fp); unp_rights++; } + FILEDESC_UNLOCK(fdescp); break; case SCM_TIMESTAMP: @@ -1233,42 +1251,50 @@ unp_gc() * before going through all this, set all FDs to * be NOT defered and NOT externally accessible */ + sx_slock(&filelist_lock); LIST_FOREACH(fp, &filehead, f_list) - fp->f_flag &= ~(FMARK|FDEFER); + fp->f_gcflag &= ~(FMARK|FDEFER); do { LIST_FOREACH(fp, &filehead, f_list) { + FILE_LOCK(fp); /* * If the file is not open, skip it */ - if (fp->f_count == 0) + if (fp->f_count == 0) { + FILE_UNLOCK(fp); continue; + } /* * If we already marked it as 'defer' in a * previous pass, then try process it this time * and un-mark it */ - if (fp->f_flag & FDEFER) { - fp->f_flag &= ~FDEFER; + if (fp->f_gcflag & FDEFER) { + fp->f_gcflag &= ~FDEFER; unp_defer--; } else { /* * if it's not defered, then check if it's * already marked.. if so skip it */ - if (fp->f_flag & FMARK) + if (fp->f_gcflag & FMARK) { + FILE_UNLOCK(fp); continue; + } /* * If all references are from messages * in transit, then skip it. it's not * externally accessible. */ - if (fp->f_count == fp->f_msgcount) + if (fp->f_count == fp->f_msgcount) { + FILE_UNLOCK(fp); continue; + } /* * If it got this far then it must be * externally accessible. */ - fp->f_flag |= FMARK; + fp->f_gcflag |= FMARK; } /* * either it was defered, or it is externally @@ -1276,8 +1302,11 @@ unp_gc() * Now check if it is possibly one of OUR sockets. */ if (fp->f_type != DTYPE_SOCKET || - (so = (struct socket *)fp->f_data) == 0) + (so = (struct socket *)fp->f_data) == 0) { + FILE_UNLOCK(fp); continue; + } + FILE_UNLOCK(fp); if (so->so_proto->pr_domain != &localdomain || (so->so_proto->pr_flags&PR_RIGHTS) == 0) continue; @@ -1307,6 +1336,7 @@ unp_gc() unp_scan(so->so_rcv.sb_mb, unp_mark); } } while (unp_defer); + sx_sunlock(&filelist_lock); /* * We grab an extra reference to each of the file table entries * that are not otherwise accessible and then free the rights @@ -1347,33 +1377,43 @@ unp_gc() * 91/09/19, bsy@cs.cmu.edu */ extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); + sx_slock(&filelist_lock); for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0; fp = nextfp) { nextfp = LIST_NEXT(fp, f_list); + FILE_LOCK(fp); /* * If it's not open, skip it */ - if (fp->f_count == 0) + if (fp->f_count == 0) { + FILE_UNLOCK(fp); continue; + } /* * If all refs are from msgs, and it's not marked accessible * then it must be referenced from some unreachable cycle * of (shut-down) FDs, so include it in our * list of FDs to remove */ - if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { + if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) { *fpp++ = fp; nunref++; fp->f_count++; } + FILE_UNLOCK(fp); } + sx_sunlock(&filelist_lock); /* * for each FD on our hit list, do the following two things */ for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { struct file *tfp = *fpp; - if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) + FILE_LOCK(tfp); + if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) { + FILE_UNLOCK(tfp); sorflush((struct socket *)(tfp->f_data)); + } else + FILE_UNLOCK(tfp); } for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) closef(*fpp, (struct thread *) NULL); @@ -1460,19 +1500,19 @@ static void unp_mark(fp) struct file *fp; { - - if (fp->f_flag & FMARK) + if (fp->f_gcflag & FMARK) return; unp_defer++; - fp->f_flag |= (FMARK|FDEFER); + fp->f_gcflag |= (FMARK|FDEFER); } static void unp_discard(fp) struct file *fp; { - + FILE_LOCK(fp); fp->f_msgcount--; unp_rights--; + FILE_UNLOCK(fp); (void) closef(fp, (struct thread *)NULL); } diff --git a/sys/kern/vfs_acl.c b/sys/kern/vfs_acl.c index b50c89645d73..63be63d733a3 100644 --- a/sys/kern/vfs_acl.c +++ b/sys/kern/vfs_acl.c @@ -703,6 +703,7 @@ __acl_get_fd(struct thread *td, struct __acl_get_fd_args *uap) if (error == 0) { error = vacl_get_acl(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -724,6 +725,7 @@ __acl_set_fd(struct thread *td, struct __acl_set_fd_args *uap) if (error == 0) { error = vacl_set_acl(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -767,6 +769,7 @@ __acl_delete_fd(struct thread *td, struct __acl_delete_fd_args *uap) if (error == 0) { error = vacl_delete(td, (struct vnode *)fp->f_data, SCARG(uap, type)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -811,6 +814,7 @@ __acl_aclcheck_fd(struct thread *td, struct __acl_aclcheck_fd_args *uap) if (error == 0) { error = vacl_aclcheck(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 85bb63227f34..d0b412d5e171 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -716,9 +716,11 @@ __getcwd(td, uap) *bp = '\0'; fdp = td->td_proc->p_fd; slash_prefixed = 0; + FILEDESC_LOCK(fdp); for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { if (vp->v_flag & VROOT) { if (vp->v_mount == NULL) { /* forced unmount */ + FILEDESC_UNLOCK(fdp); free(buf, M_TEMP); return (EBADF); } @@ -726,23 +728,27 @@ __getcwd(td, uap) continue; } if (vp->v_dd->v_id != vp->v_ddid) { + FILEDESC_UNLOCK(fdp); numcwdfail1++; free(buf, M_TEMP); return (ENOTDIR); } ncp = TAILQ_FIRST(&vp->v_cache_dst); if (!ncp) { + FILEDESC_UNLOCK(fdp); numcwdfail2++; free(buf, M_TEMP); return (ENOENT); } if (ncp->nc_dvp != vp->v_dd) { + FILEDESC_UNLOCK(fdp); numcwdfail3++; free(buf, M_TEMP); return (EBADF); } for (i = ncp->nc_nlen - 1; i >= 0; i--) { if (bp == buf) { + FILEDESC_UNLOCK(fdp); numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); @@ -750,6 +756,7 @@ __getcwd(td, uap) *--bp = ncp->nc_name[i]; } if (bp == buf) { + FILEDESC_UNLOCK(fdp); numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); @@ -758,6 +765,7 @@ __getcwd(td, uap) slash_prefixed = 1; vp = vp->v_dd; } + FILEDESC_UNLOCK(fdp); if (!slash_prefixed) { if (bp == buf) { numcwdfail4++; @@ -811,9 +819,11 @@ vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) *bp = '\0'; fdp = td->td_proc->p_fd; slash_prefixed = 0; + FILEDESC_LOCK(fdp); for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) { if (vp->v_flag & VROOT) { if (vp->v_mount == NULL) { /* forced unmount */ + FILEDESC_UNLOCK(fdp); free(buf, M_TEMP); return (EBADF); } @@ -821,23 +831,27 @@ vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) continue; } if (vp != vn && vp->v_dd->v_id != vp->v_ddid) { + FILEDESC_UNLOCK(fdp); numfullpathfail1++; free(buf, M_TEMP); return (ENOTDIR); } ncp = TAILQ_FIRST(&vp->v_cache_dst); if (!ncp) { + FILEDESC_UNLOCK(fdp); numfullpathfail2++; free(buf, M_TEMP); return (ENOENT); } if (vp != vn && ncp->nc_dvp != vp->v_dd) { + FILEDESC_UNLOCK(fdp); numfullpathfail3++; free(buf, M_TEMP); return (EBADF); } for (i = ncp->nc_nlen - 1; i >= 0; i--) { if (bp == buf) { + FILEDESC_UNLOCK(fdp); numfullpathfail4++; free(buf, M_TEMP); return (ENOMEM); @@ -845,6 +859,7 @@ vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) *--bp = ncp->nc_name[i]; } if (bp == buf) { + FILEDESC_UNLOCK(fdp); numfullpathfail4++; free(buf, M_TEMP); return (ENOMEM); @@ -855,12 +870,14 @@ vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) } if (!slash_prefixed) { if (bp == buf) { + FILEDESC_UNLOCK(fdp); numfullpathfail4++; free(buf, M_TEMP); return (ENOMEM); } *--bp = '/'; } + FILEDESC_UNLOCK(fdp); numfullpathfound++; *retbuf = bp; *freebuf = buf; diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c index fced0d76b5a8..75ac3d074c69 100644 --- a/sys/kern/vfs_extattr.c +++ b/sys/kern/vfs_extattr.c @@ -454,16 +454,21 @@ checkdirs(olddp, newdp) fdp = p->p_fd; if (fdp == NULL) continue; + FILEDESC_LOCK(fdp); if (fdp->fd_cdir == olddp) { - vrele(fdp->fd_cdir); VREF(newdp); fdp->fd_cdir = newdp; + FILEDESC_UNLOCK(fdp); + vrele(olddp); + FILEDESC_LOCK(fdp); } if (fdp->fd_rdir == olddp) { - vrele(fdp->fd_rdir); VREF(newdp); fdp->fd_rdir = newdp; - } + FILEDESC_UNLOCK(fdp); + vrele(olddp); + } else + FILEDESC_UNLOCK(fdp); } sx_sunlock(&allproc_lock); if (rootvnode == olddp) { @@ -802,6 +807,7 @@ fstatfs(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); mp = ((struct vnode *)fp->f_data)->v_mount; + fdrop(fp, td); if (mp == NULL) return (EBADF); sp = &mp->mnt_stat; @@ -903,7 +909,7 @@ fchdir(td, uap) } */ *uap; { register struct filedesc *fdp = td->td_proc->p_fd; - struct vnode *vp, *tdp; + struct vnode *vp, *tdp, *vpold; struct mount *mp; struct file *fp; int error; @@ -912,6 +918,7 @@ fchdir(td, uap) return (error); vp = (struct vnode *)fp->f_data; VREF(vp); + fdrop(fp, td); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type != VDIR) error = ENOTDIR; @@ -932,8 +939,11 @@ fchdir(td, uap) return (error); } VOP_UNLOCK(vp, 0, td); - vrele(fdp->fd_cdir); + FILEDESC_LOCK(fdp); + vpold = fdp->fd_cdir; fdp->fd_cdir = vp; + FILEDESC_UNLOCK(fdp); + vrele(vpold); return (0); } @@ -956,14 +966,18 @@ chdir(td, uap) register struct filedesc *fdp = td->td_proc->p_fd; int error; struct nameidata nd; + struct vnode *vp; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), td); if ((error = change_dir(&nd, td)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); - vrele(fdp->fd_cdir); + FILEDESC_LOCK(fdp); + vp = fdp->fd_cdir; fdp->fd_cdir = nd.ni_vp; + FILEDESC_UNLOCK(fdp); + vrele(vp); return (0); } @@ -977,18 +991,23 @@ chroot_refuse_vdir_fds(fdp) { struct vnode *vp; struct file *fp; + struct thread *td = curthread; int error; int fd; + FILEDESC_LOCK(fdp); for (fd = 0; fd < fdp->fd_nfiles ; fd++) { error = getvnode(fdp, fd, &fp); if (error) continue; vp = (struct vnode *)fp->f_data; + fdrop(fp, td); if (vp->v_type != VDIR) continue; + FILEDESC_UNLOCK(fdp); return(EPERM); } + FILEDESC_UNLOCK(fdp); return (0); } @@ -1024,13 +1043,18 @@ chroot(td, uap) register struct filedesc *fdp = td->td_proc->p_fd; int error; struct nameidata nd; + struct vnode *vp; error = suser_xxx(0, td->td_proc, PRISON_ROOT); if (error) return (error); + FILEDESC_LOCK(fdp); if (chroot_allow_open_directories == 0 || - (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) + (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { + FILEDESC_UNLOCK(fdp); error = chroot_refuse_vdir_fds(fdp); + } else + FILEDESC_UNLOCK(fdp); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, @@ -1038,12 +1062,15 @@ chroot(td, uap) if ((error = change_dir(&nd, td)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); - vrele(fdp->fd_rdir); + FILEDESC_LOCK(fdp); + vp = fdp->fd_rdir; fdp->fd_rdir = nd.ni_vp; if (!fdp->fd_jdir) { fdp->fd_jdir = nd.ni_vp; VREF(fdp->fd_jdir); } + FILEDESC_UNLOCK(fdp); + vrele(vp); return (0); } @@ -1113,7 +1140,9 @@ open(td, uap) if (error) return (error); fp = nfp; + FILEDESC_LOCK(fdp); cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; + FILEDESC_UNLOCK(fdp); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); td->td_dupfd = -indx - 1; /* XXX check for fdopen */ /* @@ -1144,10 +1173,13 @@ open(td, uap) * Clean up the descriptor, but only if another thread hadn't * replaced or closed it. */ + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp, td); - } + } else + FILEDESC_UNLOCK(fdp); if (error == ERESTART) error = EINTR; @@ -1165,9 +1197,13 @@ open(td, uap) * descriptor) while we were blocked. The end result should look * like opening the file succeeded but it was immediately closed. */ + FILEDESC_LOCK(fdp); + FILE_LOCK(fp); if (fp->f_count == 1) { KASSERT(fdp->fd_ofiles[indx] != fp, ("Open file descriptor lost all refs")); + FILEDESC_UNLOCK(fdp); + FILE_UNLOCK(fp); VOP_UNLOCK(vp, 0, td); vn_close(vp, flags & FMASK, fp->f_cred, td); fdrop(fp, td); @@ -1179,6 +1215,8 @@ open(td, uap) fp->f_flag = flags & FMASK; fp->f_ops = &vnops; fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); + FILEDESC_UNLOCK(fdp); + FILE_UNLOCK(fp); VOP_UNLOCK(vp, 0, td); if (flags & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; @@ -1219,11 +1257,13 @@ open(td, uap) td->td_retval[0] = indx; return (0); bad: + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp, td); - } - fdrop(fp, td); + } else + FILEDESC_UNLOCK(fdp); return (error); } @@ -1307,7 +1347,9 @@ mknod(td, uap) error = EEXIST; } else { VATTR_NULL(&vattr); + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); vattr.va_rdev = SCARG(uap, dev); whiteout = 0; @@ -1398,7 +1440,9 @@ mkfifo(td, uap) } VATTR_NULL(&vattr); vattr.va_type = VFIFO; + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE); error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (error == 0) @@ -1513,7 +1557,9 @@ symlink(td, uap) goto restart; } VATTR_NULL(&vattr); + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -1658,18 +1704,19 @@ lseek(td, uap) } */ *uap; { struct ucred *cred = td->td_proc->p_ucred; - register struct filedesc *fdp = td->td_proc->p_fd; register struct file *fp; - struct vattr vattr; struct vnode *vp; + struct vattr vattr; off_t offset; int error, noneg; - if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) + fp = ffind_hold(td, uap->fd); + if (fp == NULL) return (EBADF); - if (fp->f_type != DTYPE_VNODE) + if (fp->f_type != DTYPE_VNODE) { + fdrop(fp, td); return (ESPIPE); + } vp = (struct vnode *)fp->f_data; noneg = (vp->v_type != VCHR); offset = SCARG(uap, offset); @@ -1694,12 +1741,14 @@ lseek(td, uap) case L_SET: break; default: + fdrop(fp, td); return (EINVAL); } if (noneg && offset < 0) return (EINVAL); fp->f_offset = offset; *(off_t *)(td->td_retval) = fp->f_offset; + fdrop(fp, td); return (0); } @@ -2307,7 +2356,9 @@ fchflags(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags)); + error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags)); + fdrop(fp, td); + return (error); } /* @@ -2414,11 +2465,15 @@ fchmod(td, uap) } */ *uap; { struct file *fp; + struct vnode *vp; int error; if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode)); + vp = (struct vnode *)fp->f_data; + error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode)); + fdrop(fp, td); + return (error); } /* @@ -2533,12 +2588,16 @@ fchown(td, uap) } */ *uap; { struct file *fp; + struct vnode *vp; int error; if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setfown(td, (struct vnode *)fp->f_data, + vp = (struct vnode *)fp->f_data; + error = setfown(td, (struct vnode *)fp->f_data, SCARG(uap, uid), SCARG(uap, gid)); + fdrop(fp, td); + return (error); } /* @@ -2692,7 +2751,9 @@ futimes(td, uap) return (error); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL); + error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL); + fdrop(fp, td); + return (error); } /* @@ -2777,11 +2838,15 @@ ftruncate(td, uap) return(EINVAL); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - if ((fp->f_flag & FWRITE) == 0) + if ((fp->f_flag & FWRITE) == 0) { + fdrop(fp, td); return (EINVAL); + } vp = (struct vnode *)fp->f_data; - if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + fdrop(fp, td); return (error); + } VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type == VDIR) @@ -2793,6 +2858,7 @@ ftruncate(td, uap) } VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); + fdrop(fp, td); return (error); } @@ -2883,8 +2949,10 @@ fsync(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); vp = (struct vnode *)fp->f_data; - if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + fdrop(fp, td); return (error); + } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (VOP_GETVOBJECT(vp, &obj) == 0) { vm_object_page_clean(obj, 0, 0, 0); @@ -2897,6 +2965,7 @@ fsync(td, uap) VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); + fdrop(fp, td); return (error); } @@ -3068,7 +3137,9 @@ vn_mkdir(path, mode, segflg, td) } VATTR_NULL(&vattr); vattr.va_type = VDIR; + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -3190,12 +3261,16 @@ ogetdirentries(td, uap) return (EINVAL); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - if ((fp->f_flag & FREAD) == 0) + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); return (EBADF); + } vp = (struct vnode *)fp->f_data; unionread: - if (vp->v_type != VDIR) + if (vp->v_type != VDIR) { + fdrop(fp, td); return (EINVAL); + } aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; @@ -3258,15 +3333,19 @@ ogetdirentries(td, uap) FREE(dirbuf, M_TEMP); } VOP_UNLOCK(vp, 0, td); - if (error) + if (error) { + fdrop(fp, td); return (error); + } if (SCARG(uap, count) == auio.uio_resid) { if (union_dircheckp) { error = union_dircheckp(td, &vp, fp); if (error == -1) goto unionread; - if (error) + if (error) { + fdrop(fp, td); return (error); + } } if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { @@ -3281,6 +3360,7 @@ ogetdirentries(td, uap) } error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), sizeof(long)); + fdrop(fp, td); td->td_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } @@ -3316,12 +3396,16 @@ getdirentries(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - if ((fp->f_flag & FREAD) == 0) + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); return (EBADF); + } vp = (struct vnode *)fp->f_data; unionread: - if (vp->v_type != VDIR) + if (vp->v_type != VDIR) { + fdrop(fp, td); return (EINVAL); + } aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; @@ -3336,15 +3420,19 @@ getdirentries(td, uap) error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; VOP_UNLOCK(vp, 0, td); - if (error) + if (error) { + fdrop(fp, td); return (error); + } if (SCARG(uap, count) == auio.uio_resid) { if (union_dircheckp) { error = union_dircheckp(td, &vp, fp); if (error == -1) goto unionread; - if (error) + if (error) { + fdrop(fp, td); return (error); + } } if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { @@ -3362,6 +3450,7 @@ getdirentries(td, uap) sizeof(long)); } td->td_retval[0] = SCARG(uap, count) - auio.uio_resid; + fdrop(fp, td); return (error); } #ifndef _SYS_SYSPROTO_H_ @@ -3407,9 +3496,11 @@ umask(td, uap) { register struct filedesc *fdp; + FILEDESC_LOCK(td->td_proc->p_fd); fdp = td->td_proc->p_fd; td->td_retval[0] = fdp->fd_cmask; fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS; + FILEDESC_UNLOCK(td->td_proc->p_fd); return (0); } @@ -3465,6 +3556,7 @@ revoke(td, uap) /* * Convert a user file descriptor to a kernel file entry. + * The file entry is locked upon returning. */ int getvnode(fdp, fd, fpp) @@ -3472,15 +3564,28 @@ getvnode(fdp, fd, fpp) int fd; struct file **fpp; { + int error; struct file *fp; - if ((u_int)fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL) - return (EBADF); - if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) - return (EINVAL); + fp = NULL; + if (fdp == NULL) + error = EBADF; + else { + FILEDESC_LOCK(fdp); + if ((u_int)fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL) + error = EBADF; + else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) { + fp = NULL; + error = EINVAL; + } else { + fhold(fp); + error = 0; + } + FILEDESC_UNLOCK(fdp); + } *fpp = fp; - return (0); + return (error); } /* * Get (NFS) file handle @@ -3681,10 +3786,13 @@ fhopen(td, uap) * descriptor but handle the case where someone might * have dup()d or close()d it when we weren't looking. */ + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp, td); - } + } else + FILEDESC_UNLOCK(fdp); /* * release our private reference */ @@ -3995,6 +4103,7 @@ extattr_set_fd(td, uap) error = extattr_set_vp((struct vnode *)fp->f_data, SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp), SCARG(uap, iovcnt), td); + fdrop(fp, td); return (error); } @@ -4108,6 +4217,7 @@ extattr_get_fd(td, uap) SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp), SCARG(uap, iovcnt), td); + fdrop(fp, td); return (error); } @@ -4173,6 +4283,7 @@ extattr_delete_fd(td, uap) struct extattr_delete_fd_args *uap; { struct file *fp; + struct vnode *vp; char attrname[EXTATTR_MAXNAMELEN]; int error; @@ -4183,9 +4294,11 @@ extattr_delete_fd(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); + vp = (struct vnode *)fp->f_data; error = extattr_delete_vp((struct vnode *)fp->f_data, SCARG(uap, attrnamespace), attrname, td); + fdrop(fp, td); return (error); } diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 470abba9c378..66d27afd5ae5 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -144,11 +144,13 @@ namei(ndp) /* * Get starting point for the translation. */ + FILEDESC_LOCK(fdp); ndp->ni_rootdir = fdp->fd_rdir; ndp->ni_topdir = fdp->fd_jdir; dp = fdp->fd_cdir; VREF(dp); + FILEDESC_UNLOCK(fdp); for (;;) { /* * Check if root directory should replace current directory. diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index fced0d76b5a8..75ac3d074c69 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -454,16 +454,21 @@ checkdirs(olddp, newdp) fdp = p->p_fd; if (fdp == NULL) continue; + FILEDESC_LOCK(fdp); if (fdp->fd_cdir == olddp) { - vrele(fdp->fd_cdir); VREF(newdp); fdp->fd_cdir = newdp; + FILEDESC_UNLOCK(fdp); + vrele(olddp); + FILEDESC_LOCK(fdp); } if (fdp->fd_rdir == olddp) { - vrele(fdp->fd_rdir); VREF(newdp); fdp->fd_rdir = newdp; - } + FILEDESC_UNLOCK(fdp); + vrele(olddp); + } else + FILEDESC_UNLOCK(fdp); } sx_sunlock(&allproc_lock); if (rootvnode == olddp) { @@ -802,6 +807,7 @@ fstatfs(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); mp = ((struct vnode *)fp->f_data)->v_mount; + fdrop(fp, td); if (mp == NULL) return (EBADF); sp = &mp->mnt_stat; @@ -903,7 +909,7 @@ fchdir(td, uap) } */ *uap; { register struct filedesc *fdp = td->td_proc->p_fd; - struct vnode *vp, *tdp; + struct vnode *vp, *tdp, *vpold; struct mount *mp; struct file *fp; int error; @@ -912,6 +918,7 @@ fchdir(td, uap) return (error); vp = (struct vnode *)fp->f_data; VREF(vp); + fdrop(fp, td); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type != VDIR) error = ENOTDIR; @@ -932,8 +939,11 @@ fchdir(td, uap) return (error); } VOP_UNLOCK(vp, 0, td); - vrele(fdp->fd_cdir); + FILEDESC_LOCK(fdp); + vpold = fdp->fd_cdir; fdp->fd_cdir = vp; + FILEDESC_UNLOCK(fdp); + vrele(vpold); return (0); } @@ -956,14 +966,18 @@ chdir(td, uap) register struct filedesc *fdp = td->td_proc->p_fd; int error; struct nameidata nd; + struct vnode *vp; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), td); if ((error = change_dir(&nd, td)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); - vrele(fdp->fd_cdir); + FILEDESC_LOCK(fdp); + vp = fdp->fd_cdir; fdp->fd_cdir = nd.ni_vp; + FILEDESC_UNLOCK(fdp); + vrele(vp); return (0); } @@ -977,18 +991,23 @@ chroot_refuse_vdir_fds(fdp) { struct vnode *vp; struct file *fp; + struct thread *td = curthread; int error; int fd; + FILEDESC_LOCK(fdp); for (fd = 0; fd < fdp->fd_nfiles ; fd++) { error = getvnode(fdp, fd, &fp); if (error) continue; vp = (struct vnode *)fp->f_data; + fdrop(fp, td); if (vp->v_type != VDIR) continue; + FILEDESC_UNLOCK(fdp); return(EPERM); } + FILEDESC_UNLOCK(fdp); return (0); } @@ -1024,13 +1043,18 @@ chroot(td, uap) register struct filedesc *fdp = td->td_proc->p_fd; int error; struct nameidata nd; + struct vnode *vp; error = suser_xxx(0, td->td_proc, PRISON_ROOT); if (error) return (error); + FILEDESC_LOCK(fdp); if (chroot_allow_open_directories == 0 || - (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) + (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { + FILEDESC_UNLOCK(fdp); error = chroot_refuse_vdir_fds(fdp); + } else + FILEDESC_UNLOCK(fdp); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, @@ -1038,12 +1062,15 @@ chroot(td, uap) if ((error = change_dir(&nd, td)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); - vrele(fdp->fd_rdir); + FILEDESC_LOCK(fdp); + vp = fdp->fd_rdir; fdp->fd_rdir = nd.ni_vp; if (!fdp->fd_jdir) { fdp->fd_jdir = nd.ni_vp; VREF(fdp->fd_jdir); } + FILEDESC_UNLOCK(fdp); + vrele(vp); return (0); } @@ -1113,7 +1140,9 @@ open(td, uap) if (error) return (error); fp = nfp; + FILEDESC_LOCK(fdp); cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; + FILEDESC_UNLOCK(fdp); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); td->td_dupfd = -indx - 1; /* XXX check for fdopen */ /* @@ -1144,10 +1173,13 @@ open(td, uap) * Clean up the descriptor, but only if another thread hadn't * replaced or closed it. */ + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp, td); - } + } else + FILEDESC_UNLOCK(fdp); if (error == ERESTART) error = EINTR; @@ -1165,9 +1197,13 @@ open(td, uap) * descriptor) while we were blocked. The end result should look * like opening the file succeeded but it was immediately closed. */ + FILEDESC_LOCK(fdp); + FILE_LOCK(fp); if (fp->f_count == 1) { KASSERT(fdp->fd_ofiles[indx] != fp, ("Open file descriptor lost all refs")); + FILEDESC_UNLOCK(fdp); + FILE_UNLOCK(fp); VOP_UNLOCK(vp, 0, td); vn_close(vp, flags & FMASK, fp->f_cred, td); fdrop(fp, td); @@ -1179,6 +1215,8 @@ open(td, uap) fp->f_flag = flags & FMASK; fp->f_ops = &vnops; fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); + FILEDESC_UNLOCK(fdp); + FILE_UNLOCK(fp); VOP_UNLOCK(vp, 0, td); if (flags & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; @@ -1219,11 +1257,13 @@ open(td, uap) td->td_retval[0] = indx; return (0); bad: + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp, td); - } - fdrop(fp, td); + } else + FILEDESC_UNLOCK(fdp); return (error); } @@ -1307,7 +1347,9 @@ mknod(td, uap) error = EEXIST; } else { VATTR_NULL(&vattr); + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); vattr.va_rdev = SCARG(uap, dev); whiteout = 0; @@ -1398,7 +1440,9 @@ mkfifo(td, uap) } VATTR_NULL(&vattr); vattr.va_type = VFIFO; + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE); error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (error == 0) @@ -1513,7 +1557,9 @@ symlink(td, uap) goto restart; } VATTR_NULL(&vattr); + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -1658,18 +1704,19 @@ lseek(td, uap) } */ *uap; { struct ucred *cred = td->td_proc->p_ucred; - register struct filedesc *fdp = td->td_proc->p_fd; register struct file *fp; - struct vattr vattr; struct vnode *vp; + struct vattr vattr; off_t offset; int error, noneg; - if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) + fp = ffind_hold(td, uap->fd); + if (fp == NULL) return (EBADF); - if (fp->f_type != DTYPE_VNODE) + if (fp->f_type != DTYPE_VNODE) { + fdrop(fp, td); return (ESPIPE); + } vp = (struct vnode *)fp->f_data; noneg = (vp->v_type != VCHR); offset = SCARG(uap, offset); @@ -1694,12 +1741,14 @@ lseek(td, uap) case L_SET: break; default: + fdrop(fp, td); return (EINVAL); } if (noneg && offset < 0) return (EINVAL); fp->f_offset = offset; *(off_t *)(td->td_retval) = fp->f_offset; + fdrop(fp, td); return (0); } @@ -2307,7 +2356,9 @@ fchflags(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags)); + error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags)); + fdrop(fp, td); + return (error); } /* @@ -2414,11 +2465,15 @@ fchmod(td, uap) } */ *uap; { struct file *fp; + struct vnode *vp; int error; if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode)); + vp = (struct vnode *)fp->f_data; + error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode)); + fdrop(fp, td); + return (error); } /* @@ -2533,12 +2588,16 @@ fchown(td, uap) } */ *uap; { struct file *fp; + struct vnode *vp; int error; if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setfown(td, (struct vnode *)fp->f_data, + vp = (struct vnode *)fp->f_data; + error = setfown(td, (struct vnode *)fp->f_data, SCARG(uap, uid), SCARG(uap, gid)); + fdrop(fp, td); + return (error); } /* @@ -2692,7 +2751,9 @@ futimes(td, uap) return (error); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL); + error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL); + fdrop(fp, td); + return (error); } /* @@ -2777,11 +2838,15 @@ ftruncate(td, uap) return(EINVAL); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - if ((fp->f_flag & FWRITE) == 0) + if ((fp->f_flag & FWRITE) == 0) { + fdrop(fp, td); return (EINVAL); + } vp = (struct vnode *)fp->f_data; - if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + fdrop(fp, td); return (error); + } VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type == VDIR) @@ -2793,6 +2858,7 @@ ftruncate(td, uap) } VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); + fdrop(fp, td); return (error); } @@ -2883,8 +2949,10 @@ fsync(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); vp = (struct vnode *)fp->f_data; - if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + fdrop(fp, td); return (error); + } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (VOP_GETVOBJECT(vp, &obj) == 0) { vm_object_page_clean(obj, 0, 0, 0); @@ -2897,6 +2965,7 @@ fsync(td, uap) VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); + fdrop(fp, td); return (error); } @@ -3068,7 +3137,9 @@ vn_mkdir(path, mode, segflg, td) } VATTR_NULL(&vattr); vattr.va_type = VDIR; + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -3190,12 +3261,16 @@ ogetdirentries(td, uap) return (EINVAL); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - if ((fp->f_flag & FREAD) == 0) + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); return (EBADF); + } vp = (struct vnode *)fp->f_data; unionread: - if (vp->v_type != VDIR) + if (vp->v_type != VDIR) { + fdrop(fp, td); return (EINVAL); + } aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; @@ -3258,15 +3333,19 @@ ogetdirentries(td, uap) FREE(dirbuf, M_TEMP); } VOP_UNLOCK(vp, 0, td); - if (error) + if (error) { + fdrop(fp, td); return (error); + } if (SCARG(uap, count) == auio.uio_resid) { if (union_dircheckp) { error = union_dircheckp(td, &vp, fp); if (error == -1) goto unionread; - if (error) + if (error) { + fdrop(fp, td); return (error); + } } if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { @@ -3281,6 +3360,7 @@ ogetdirentries(td, uap) } error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), sizeof(long)); + fdrop(fp, td); td->td_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } @@ -3316,12 +3396,16 @@ getdirentries(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - if ((fp->f_flag & FREAD) == 0) + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); return (EBADF); + } vp = (struct vnode *)fp->f_data; unionread: - if (vp->v_type != VDIR) + if (vp->v_type != VDIR) { + fdrop(fp, td); return (EINVAL); + } aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; @@ -3336,15 +3420,19 @@ getdirentries(td, uap) error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; VOP_UNLOCK(vp, 0, td); - if (error) + if (error) { + fdrop(fp, td); return (error); + } if (SCARG(uap, count) == auio.uio_resid) { if (union_dircheckp) { error = union_dircheckp(td, &vp, fp); if (error == -1) goto unionread; - if (error) + if (error) { + fdrop(fp, td); return (error); + } } if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { @@ -3362,6 +3450,7 @@ getdirentries(td, uap) sizeof(long)); } td->td_retval[0] = SCARG(uap, count) - auio.uio_resid; + fdrop(fp, td); return (error); } #ifndef _SYS_SYSPROTO_H_ @@ -3407,9 +3496,11 @@ umask(td, uap) { register struct filedesc *fdp; + FILEDESC_LOCK(td->td_proc->p_fd); fdp = td->td_proc->p_fd; td->td_retval[0] = fdp->fd_cmask; fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS; + FILEDESC_UNLOCK(td->td_proc->p_fd); return (0); } @@ -3465,6 +3556,7 @@ revoke(td, uap) /* * Convert a user file descriptor to a kernel file entry. + * The file entry is locked upon returning. */ int getvnode(fdp, fd, fpp) @@ -3472,15 +3564,28 @@ getvnode(fdp, fd, fpp) int fd; struct file **fpp; { + int error; struct file *fp; - if ((u_int)fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL) - return (EBADF); - if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) - return (EINVAL); + fp = NULL; + if (fdp == NULL) + error = EBADF; + else { + FILEDESC_LOCK(fdp); + if ((u_int)fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL) + error = EBADF; + else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) { + fp = NULL; + error = EINVAL; + } else { + fhold(fp); + error = 0; + } + FILEDESC_UNLOCK(fdp); + } *fpp = fp; - return (0); + return (error); } /* * Get (NFS) file handle @@ -3681,10 +3786,13 @@ fhopen(td, uap) * descriptor but handle the case where someone might * have dup()d or close()d it when we weren't looking. */ + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp, td); - } + } else + FILEDESC_UNLOCK(fdp); /* * release our private reference */ @@ -3995,6 +4103,7 @@ extattr_set_fd(td, uap) error = extattr_set_vp((struct vnode *)fp->f_data, SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp), SCARG(uap, iovcnt), td); + fdrop(fp, td); return (error); } @@ -4108,6 +4217,7 @@ extattr_get_fd(td, uap) SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp), SCARG(uap, iovcnt), td); + fdrop(fp, td); return (error); } @@ -4173,6 +4283,7 @@ extattr_delete_fd(td, uap) struct extattr_delete_fd_args *uap; { struct file *fp; + struct vnode *vp; char attrname[EXTATTR_MAXNAMELEN]; int error; @@ -4183,9 +4294,11 @@ extattr_delete_fd(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); + vp = (struct vnode *)fp->f_data; error = extattr_delete_vp((struct vnode *)fp->f_data, SCARG(uap, attrnamespace), attrname, td); + fdrop(fp, td); return (error); } diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 70448d60e52d..1bbed38f1a83 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -265,6 +265,7 @@ static __inline int sequential_heuristic(struct uio *uio, struct file *fp) { + /* * Sequential heuristic - detect sequential operation */ @@ -446,7 +447,6 @@ vn_write(fp, uio, cred, flags, td) vp = (struct vnode *)fp->f_data; if (vp->v_type == VREG) bwillwrite(); - vp = (struct vnode *)fp->f_data; /* XXX needed? */ ioflag = IO_UNIT; if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) ioflag |= IO_APPEND; diff --git a/sys/netgraph/ng_socket.c b/sys/netgraph/ng_socket.c index f34fa7f07270..09ce2cf3d752 100644 --- a/sys/netgraph/ng_socket.c +++ b/sys/netgraph/ng_socket.c @@ -569,7 +569,6 @@ ng_detach_common(struct ngpcb *pcbp, int which) static int ng_internalize(struct mbuf *control, struct thread *td) { - struct filedesc *fdp = td->td_proc->p_fd; struct cmsghdr *cm = mtod(control, struct cmsghdr *); struct file *fp; struct vnode *vn; @@ -592,10 +591,9 @@ ng_internalize(struct mbuf *control, struct thread *td) /* Check that the FD given is legit. and change it to a pointer to a * struct file. */ fd = CMSG_DATA(cm); - if ((unsigned) fd >= fdp->fd_nfiles - || (fp = fdp->fd_ofiles[fd]) == NULL) { + fp = ffind_hold(td, fd); + if (fp == NULL) return (EBADF); - } /* Depending on what kind of resource it is, act differently. For * devices, we treat it as a file. For a AF_NETGRAPH socket, @@ -609,14 +607,17 @@ ng_internalize(struct mbuf *control, struct thread *td) /* XXX then what :) */ /* how to pass on to other modules? */ } else { + fdrop(fp, td); TRAP_ERROR; return (EINVAL); } break; default: + fdrop(fp, td); TRAP_ERROR; return (EINVAL); } + fdrop(fp, td); return (0); } #endif /* NOTYET */ diff --git a/sys/netsmb/smb_dev.c b/sys/netsmb/smb_dev.c index 2a0f671762e3..c832fa8166cd 100644 --- a/sys/netsmb/smb_dev.c +++ b/sys/netsmb/smb_dev.c @@ -394,10 +394,15 @@ nsmb_getfp(struct filedesc* fdp, int fd, int flag) { struct file* fp; + FILEDESC_LOCK(fdp); if (((u_int)fd) >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL || - (fp->f_flag & flag) == 0) + (fp->f_flag & flag) == 0) { + FILEDESC_UNLOCK(fdp); return (NULL); + } + fhold(fp); + FILEDESC_UNLOCK(fdp); return (fp); } @@ -416,19 +421,25 @@ smb_dev2share(int fd, int mode, struct smb_cred *scred, if (fp == NULL) return EBADF; vp = (struct vnode*)fp->f_data; - if (vp == NULL) + if (vp == NULL) { + fdrop(fp, curthread); return EBADF; + } dev = vn_todev(vp); - if (dev == NODEV) + if (dev == NODEV) { + fdrop(fp, curthread); return EBADF; + } SMB_CHECKMINOR(dev); ssp = sdp->sd_share; - if (ssp == NULL) + if (ssp == NULL) { + fdrop(fp, curthread); return ENOTCONN; + } error = smb_share_get(ssp, LK_EXCLUSIVE, scred); - if (error) - return error; - *sspp = ssp; - return 0; + if (error == 0) + *sspp = ssp; + fdrop(fp, curthread); + return error; } diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index 85a168c98f98..bec6e5dc90f8 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -90,8 +90,9 @@ #define O_TRUNC 0x0400 /* truncate to zero length */ #define O_EXCL 0x0800 /* error if already exists */ #ifdef _KERNEL -#define FMARK 0x1000 /* mark during gc() */ -#define FDEFER 0x2000 /* defer for next gc pass */ +/* FMARK/FDEFER kept in f_gcflags */ +#define FMARK 0x1 /* mark during gc() */ +#define FDEFER 0x2 /* defer for next gc pass */ #define FHASLOCK 0x4000 /* descriptor holds advisory lock */ #endif diff --git a/sys/sys/file.h b/sys/sys/file.h index f5521679c23a..575b60867993 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -43,6 +43,10 @@ #endif #ifdef _KERNEL +#include +#include +#include +#include #include struct stat; @@ -55,18 +59,24 @@ struct socket; /* * Kernel descriptor table. * One entry for each open kernel vnode and socket. + * + * Below is the list of locks that protects members in struct file. + * + * (fl) filelist_lock + * (f) f_mtx in struct file + * none not locked */ struct file { - LIST_ENTRY(file) f_list;/* list of active files */ - short f_FILLER3; /* (old f_flag) */ + LIST_ENTRY(file) f_list;/* (fl) list of active files */ + short f_gcflag; /* used by thread doing fd garbage collection */ #define DTYPE_VNODE 1 /* file */ #define DTYPE_SOCKET 2 /* communications endpoint */ #define DTYPE_PIPE 3 /* pipe */ #define DTYPE_FIFO 4 /* fifo (named pipe) */ #define DTYPE_KQUEUE 5 /* event queue */ short f_type; /* descriptor type */ - int f_count; /* reference count */ - int f_msgcount; /* references from message queue */ + int f_count; /* (f) reference count */ + int f_msgcount; /* (f) references from message queue */ struct ucred *f_cred; /* credentials associated with descriptor */ struct fileops { int (*fo_read) __P((struct file *fp, struct uio *uio, @@ -96,6 +106,7 @@ struct file { off_t f_offset; caddr_t f_data; /* vnode or socket */ u_int f_flag; /* see fcntl.h */ + struct mtx f_mtx; /* mutex to protect data */ }; #ifdef MALLOC_DECLARE @@ -103,18 +114,41 @@ MALLOC_DECLARE(M_FILE); #endif LIST_HEAD(filelist, file); -extern struct filelist filehead; /* head of list of open files */ +extern struct filelist filehead; /* (fl) head of list of open files */ extern struct fileops vnops; extern struct fileops badfileops; extern int maxfiles; /* kernel limit on number of open files */ extern int maxfilesperproc; /* per process limit on number of open files */ -extern int nfiles; /* actual number of open files */ +extern int nfiles; /* (fl) actual number of open files */ +extern struct sx filelist_lock; /* sx to protect filelist and nfiles */ -static __inline void fhold __P((struct file *fp)); +static __inline struct file * fhold __P((struct file *fp)); +static __inline struct file * fhold_locked __P((struct file *fp)); int fget __P((struct thread *td, int fd, struct file **fpp)); int fget_read __P((struct thread *td, int fd, struct file **fpp)); int fget_write __P((struct thread *td, int fd, struct file **fpp)); int fdrop __P((struct file *fp, struct thread *td)); +int fdrop_locked __P((struct file *fp, struct thread *td)); + +/* Lock a file. */ +/*#define FILE_LOCK_DEBUG*/ +#ifdef FILE_LOCK_DEBUG +#define FILE_LOCK(f) \ + do { \ + printf("FLCK: %p %s %d\n", &(f)->f_mtx, __FILE__, __LINE__); \ + mtx_lock(&(f)->f_mtx); \ + } while (0) +#define FILE_UNLOCK(f) \ + do { \ + printf("FREL: %p %s %d\n", &(f)->f_mtx, __FILE__, __LINE__); \ + mtx_unlock(&(f)->f_mtx); \ + } while (0) +#else +#define FILE_LOCK(f) mtx_lock(&(f)->f_mtx) +#define FILE_UNLOCK(f) mtx_unlock(&(f)->f_mtx) +#endif +#define FILE_LOCKED(f) mtx_owned(&(f)->f_mtx) +#define FILE_LOCK_ASSERT(f, type) mtx_assert(&(f)->f_mtx, (type)) int fgetvp __P((struct thread *td, int fd, struct vnode **vpp)); int fgetvp_read __P((struct thread *td, int fd, struct vnode **vpp)); @@ -123,12 +157,27 @@ int fgetvp_write __P((struct thread *td, int fd, struct vnode **vpp)); int fgetsock __P((struct thread *td, int fd, struct socket **spp, u_int *fflagp)); void fputsock __P((struct socket *sp)); -static __inline void +static __inline struct file * +fhold_locked(fp) + struct file *fp; +{ + +#ifdef INVARIANTS + FILE_LOCK_ASSERT(fp, MA_OWNED); +#endif + fp->f_count++; + return (fp); +} + +static __inline struct file * fhold(fp) struct file *fp; { - fp->f_count++; + FILE_LOCK(fp); + fhold_locked(fp); + FILE_UNLOCK(fp); + return (fp); } static __inline int fo_read __P((struct file *fp, struct uio *uio, @@ -143,6 +192,9 @@ static __inline int fo_stat __P((struct file *fp, struct stat *sb, struct thread *td)); static __inline int fo_close __P((struct file *fp, struct thread *td)); static __inline int fo_kqfilter __P((struct file *fp, struct knote *kn)); +struct proc; +struct file *ffind_hold(struct thread *, int fd); +struct file *ffind_lock(struct thread *, int fd); static __inline int fo_read(fp, uio, cred, flags, td) @@ -152,12 +204,8 @@ fo_read(fp, uio, cred, flags, td) struct thread *td; int flags; { - int error; - fhold(fp); - error = (*fp->f_ops->fo_read)(fp, uio, cred, flags, td); - fdrop(fp, td); - return (error); + return ((*fp->f_ops->fo_read)(fp, uio, cred, flags, td)); } static __inline int @@ -168,12 +216,7 @@ fo_write(fp, uio, cred, flags, td) struct thread *td; int flags; { - int error; - - fhold(fp); - error = (*fp->f_ops->fo_write)(fp, uio, cred, flags, td); - fdrop(fp, td); - return (error); + return ((*fp->f_ops->fo_write)(fp, uio, cred, flags, td)); } static __inline int @@ -183,12 +226,7 @@ fo_ioctl(fp, com, data, td) caddr_t data; struct thread *td; { - int error; - - fhold(fp); - error = (*fp->f_ops->fo_ioctl)(fp, com, data, td); - fdrop(fp, td); - return (error); + return ((*fp->f_ops->fo_ioctl)(fp, com, data, td)); } static __inline int @@ -198,12 +236,8 @@ fo_poll(fp, events, cred, td) struct ucred *cred; struct thread *td; { - int error; - - fhold(fp); - error = (*fp->f_ops->fo_poll)(fp, events, cred, td); - fdrop(fp, td); - return (error); + /* select(2) and poll(2) hold file descriptors. */ + return ((*fp->f_ops->fo_poll)(fp, events, cred, td)); } static __inline int @@ -212,12 +246,7 @@ fo_stat(fp, sb, td) struct stat *sb; struct thread *td; { - int error; - - fhold(fp); - error = (*fp->f_ops->fo_stat)(fp, sb, td); - fdrop(fp, td); - return (error); + return ((*fp->f_ops->fo_stat)(fp, sb, td)); } static __inline int diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 3c1cb8d2cbad..7699a26b8f26 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -37,7 +37,10 @@ #ifndef _SYS_FILEDESC_H_ #define _SYS_FILEDESC_H_ +#include +#include #include +#include /* * This structure is used for the management of descriptors. It may be @@ -63,8 +66,8 @@ struct filedesc { struct vnode *fd_rdir; /* root directory */ struct vnode *fd_jdir; /* jail root directory */ int fd_nfiles; /* number of open files allocated */ - u_short fd_lastfile; /* high-water mark of fd_ofiles */ - u_short fd_freefile; /* approx. next free file */ + int fd_lastfile; /* high-water mark of fd_ofiles */ + int fd_freefile; /* approx. next free file */ u_short fd_cmask; /* mask for file creation */ u_short fd_refcnt; /* reference count */ @@ -72,6 +75,7 @@ struct filedesc { struct klist *fd_knlist; /* list of attached knotes */ u_long fd_knhashmask; /* size of knhash */ struct klist *fd_knhash; /* hash table for attached knotes */ + struct mtx fd_mtx; /* mtx to protect the members of struct filedesc */ }; /* @@ -125,6 +129,27 @@ struct sigio { SLIST_HEAD(sigiolst, sigio); #ifdef _KERNEL + +/* Lock a file descriptor table. */ +/*#define FILEDESC_LOCK_DEBUG*/ +#ifdef FILEDESC_LOCK_DEBUG +#define FILEDESC_LOCK(fd) \ + do { \ + printf("FD_LCK: %p %s %d\n", &(fd)->fd_mtx, __FILE__, __LINE__); \ + mtx_lock(&(fd)->fd_mtx); \ + } while (0) +#define FILEDESC_UNLOCK(fd) \ + do { \ + printf("FD_REL: %p %s %d\n", &(fd)->fd_mtx, __FILE__, __LINE__); \ + mtx_unlock(&(fd)->fd_mtx); \ + } while (0) +#else +#define FILEDESC_LOCK(fd) mtx_lock(&(fd)->fd_mtx) +#define FILEDESC_UNLOCK(fd) mtx_unlock(&(fd)->fd_mtx) +#endif +#define FILEDESC_LOCKED(fd) mtx_owned(&(fd)->fd_mtx) +#define FILEDESC_LOCK_ASSERT(fd, type) mtx_assert(&(fd)->fd_mtx, (type)) + int closef __P((struct file *fp, struct thread *p)); int dupfdopen __P((struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error)); diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index e2fb3abe837d..321bf5ac59d7 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -1906,10 +1906,12 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) vn_start_write((struct vnode *)fp->f_data, &mp, V_WAIT); if (mp == 0 || strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) { vn_finished_write(mp); + fdrop(fp, curthread); return (EINVAL); } if (mp->mnt_flag & MNT_RDONLY) { vn_finished_write(mp); + fdrop(fp, curthread); return (EROFS); } ump = VFSTOUFS(mp); @@ -2041,6 +2043,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) break; } + fdrop(fp, curthread); vn_finished_write(mp); return (error); } diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 015ef9bc9325..9bf68c118722 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -199,7 +199,6 @@ mmap(td, uap) struct thread *td; struct mmap_args *uap; { - struct filedesc *fdp = td->td_proc->p_fd; struct file *fp = NULL; struct vnode *vp; vm_offset_t addr; @@ -218,6 +217,7 @@ mmap(td, uap) flags = uap->flags; pos = uap->pos; + fp = NULL; /* make sure mapping fits into numeric range etc */ if ((ssize_t) uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1)) @@ -290,22 +290,18 @@ mmap(td, uap) /* * Mapping file, get fp for validation. Obtain vnode and make * sure it is of appropriate type. + * don't let the descriptor disappear on us if we block */ - if (((unsigned) uap->fd) >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[uap->fd]) == NULL) { + fp = ffind_hold(td, uap->fd); + if (fp == NULL) { error = EBADF; - goto done2; + goto done; } if (fp->f_type != DTYPE_VNODE) { error = EINVAL; - goto done2; + goto done; } - /* - * don't let the descriptor disappear on us if we block - */ - fhold(fp); - /* * POSIX shared-memory objects are defined to have * kernel persistence, and are not defined to support @@ -437,7 +433,6 @@ mmap(td, uap) done: if (fp) fdrop(fp, td); -done2: mtx_unlock(&Giant); return (error); } @@ -642,7 +637,9 @@ munmapfd(td, fd) /* * XXX should unmap any regions mapped to this file */ + FILEDESC_LOCK(p->p_fd); td->td_proc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; + FILEDESC_UNLOCK(p->p_fd); } #endif