From 571dcd15e24b65783f9a5063cf81b9cc95449ffe Mon Sep 17 00:00:00 2001 From: Suleiman Souhlal Date: Fri, 1 Jul 2005 16:28:32 +0000 Subject: [PATCH] Fix the recent panics/LORs/hangs created by my kqueue commit by: - Introducing the possibility of using locks different than mutexes for the knlist locking. In order to do this, we add three arguments to knlist_init() to specify the functions to use to lock, unlock and check if the lock is owned. If these arguments are NULL, we assume mtx_lock, mtx_unlock and mtx_owned, respectively. - Using the vnode lock for the knlist locking, when doing kqueue operations on a vnode. This way, we don't have to lock the vnode while holding a mutex, in filt_vfsread. Reviewed by: jmg Approved by: re (scottl), scottl (mentor override) Pointyhat to: ssouhlal Will be happy: everyone --- sys/cam/scsi/scsi_target.c | 2 +- sys/kern/init_main.c | 2 +- sys/kern/kern_event.c | 111 +++++++++++++++++++++++++++---------- sys/kern/kern_fork.c | 2 +- sys/kern/sys_pipe.c | 6 +- sys/kern/tty.c | 4 +- sys/kern/uipc_sockbuf.c | 6 +- sys/kern/uipc_socket.c | 6 +- sys/kern/uipc_socket2.c | 6 +- sys/kern/vfs_aio.c | 2 +- sys/kern/vfs_subr.c | 72 ++++++++++++++++-------- sys/net/bpf.c | 2 +- sys/net/if.c | 4 +- sys/sys/event.h | 9 ++- sys/sys/mount.h | 11 +++- sys/sys/vnode.h | 6 +- 16 files changed, 176 insertions(+), 75 deletions(-) diff --git a/sys/cam/scsi/scsi_target.c b/sys/cam/scsi/scsi_target.c index b60c805f6892..0b7caac831c3 100644 --- a/sys/cam/scsi/scsi_target.c +++ b/sys/cam/scsi/scsi_target.c @@ -196,7 +196,7 @@ targopen(struct cdev *dev, int flags, int fmt, struct thread *td) TAILQ_INIT(&softc->work_queue); TAILQ_INIT(&softc->abort_queue); TAILQ_INIT(&softc->user_ccb_queue); - knlist_init(&softc->read_select.si_note, &softc->mtx); + knlist_init(&softc->read_select.si_note, &softc->mtx, NULL, NULL, NULL); return (0); } diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 8c9f7ff13a39..1b356650c927 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -368,7 +368,7 @@ proc0_init(void *dummy __unused) p->p_flag = P_SYSTEM; p->p_sflag = PS_INMEM; p->p_state = PRS_NORMAL; - knlist_init(&p->p_klist, &p->p_mtx); + knlist_init(&p->p_klist, &p->p_mtx, NULL, NULL, NULL); p->p_nice = NZERO; td->td_state = TDS_RUNNING; kg->kg_pri_class = PRI_TIMESHARE; diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index d9ec1fd01aae..2e71f23d29db 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -185,12 +185,31 @@ SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, } while (0) #define KN_LIST_LOCK(kn) do { \ if (kn->kn_knlist != NULL) \ - mtx_lock(kn->kn_knlist->kl_lock); \ + kn->kn_knlist->kl_lock(kn->kn_knlist->kl_lockarg); \ } while (0) #define KN_LIST_UNLOCK(kn) do { \ - if (kn->kn_knlist != NULL) \ - mtx_unlock(kn->kn_knlist->kl_lock); \ + if (kn->kn_knlist != NULL) \ + kn->kn_knlist->kl_unlock(kn->kn_knlist->kl_lockarg); \ } while (0) +#define KNL_ASSERT_LOCK(knl, islocked) do { \ + if (islocked) \ + KNL_ASSERT_LOCKED(knl); \ + else \ + KNL_ASSERT_UNLOCKED(knl); \ +} while (0) +#ifdef INVARIANTS +#define KNL_ASSERT_LOCKED(knl) do { \ + if (!knl->kl_locked((knl)->kl_lockarg)) \ + panic("knlist not locked, but should be"); \ +} while (0) +#define KNL_ASSERT_UNLOCKED(knl) do { \ + if (knl->kl_locked((knl)->kl_lockarg)) \ + panic("knlist locked, but should not be"); \ +} while (0) +#else /* !INVARIANTS */ +#define KNL_ASSERT_LOCKED(knl) do {} while(0) +#define KNL_ASSERT_UNLOCKED(knl) do {} while (0) +#endif /* INVARIANTS */ #define KN_HASHSIZE 64 /* XXX should be tunable */ #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) @@ -499,7 +518,7 @@ kqueue(struct thread *td, struct kqueue_args *uap) mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK); TAILQ_INIT(&kq->kq_head); kq->kq_fdp = fdp; - knlist_init(&kq->kq_sel.si_note, &kq->kq_lock); + knlist_init(&kq->kq_sel.si_note, &kq->kq_lock, NULL, NULL, NULL); TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); FILEDESC_LOCK_FAST(fdp); @@ -1507,9 +1526,11 @@ knote(struct knlist *list, long hint, int islocked) if (list == NULL) return; - mtx_assert(list->kl_lock, islocked ? MA_OWNED : MA_NOTOWNED); - if (!islocked) - mtx_lock(list->kl_lock); + KNL_ASSERT_LOCK(list, islocked); + + if (!islocked) + list->kl_lock(list->kl_lockarg); + /* * If we unlock the list lock (and set KN_INFLUX), we can eliminate * the kqueue scheduling, but this will introduce four @@ -1533,7 +1554,7 @@ knote(struct knlist *list, long hint, int islocked) kq = NULL; } if (!islocked) - mtx_unlock(list->kl_lock); + list->kl_unlock(list->kl_lockarg); } /* @@ -1542,15 +1563,15 @@ knote(struct knlist *list, long hint, int islocked) void knlist_add(struct knlist *knl, struct knote *kn, int islocked) { - mtx_assert(knl->kl_lock, islocked ? MA_OWNED : MA_NOTOWNED); + KNL_ASSERT_LOCK(knl, islocked); KQ_NOTOWNED(kn->kn_kq); KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == (KN_INFLUX|KN_DETACHED), ("knote not KN_INFLUX and KN_DETACHED")); if (!islocked) - mtx_lock(knl->kl_lock); + knl->kl_lock(knl->kl_lockarg); SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext); if (!islocked) - mtx_unlock(knl->kl_lock); + knl->kl_unlock(knl->kl_lockarg); KQ_LOCK(kn->kn_kq); kn->kn_knlist = knl; kn->kn_status &= ~KN_DETACHED; @@ -1561,17 +1582,17 @@ static void knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked) { KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked")); - mtx_assert(knl->kl_lock, knlislocked ? MA_OWNED : MA_NOTOWNED); + KNL_ASSERT_LOCK(knl, knlislocked); mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED); if (!kqislocked) KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == KN_INFLUX, ("knlist_remove called w/o knote being KN_INFLUX or already removed")); if (!knlislocked) - mtx_lock(knl->kl_lock); + knl->kl_lock(knl->kl_lockarg); SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext); kn->kn_knlist = NULL; if (!knlislocked) - mtx_unlock(knl->kl_lock); + knl->kl_unlock(knl->kl_lockarg); if (!kqislocked) KQ_LOCK(kn->kn_kq); kn->kn_status |= KN_DETACHED; @@ -1603,23 +1624,57 @@ knlist_remove_inevent(struct knlist *knl, struct knote *kn) int knlist_empty(struct knlist *knl) { - - mtx_assert(knl->kl_lock, MA_OWNED); + KNL_ASSERT_LOCKED(knl); return SLIST_EMPTY(&knl->kl_list); } static struct mtx knlist_lock; MTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects", MTX_DEF); +static void knlist_mtx_lock(void *arg); +static void knlist_mtx_unlock(void *arg); +static int knlist_mtx_locked(void *arg); + +static void +knlist_mtx_lock(void *arg) +{ + mtx_lock((struct mtx *)arg); +} + +static void +knlist_mtx_unlock(void *arg) +{ + mtx_unlock((struct mtx *)arg); +} + +static int +knlist_mtx_locked(void *arg) +{ + return (mtx_owned((struct mtx *)arg)); +} void -knlist_init(struct knlist *knl, struct mtx *mtx) +knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *), + void (*kl_unlock)(void *), int (*kl_locked)(void *)) { - if (mtx == NULL) - knl->kl_lock = &knlist_lock; + if (lock == NULL) + knl->kl_lockarg = &knlist_lock; else - knl->kl_lock = mtx; + knl->kl_lockarg = lock; + + if (kl_lock == NULL) + knl->kl_lock = knlist_mtx_lock; + else + knl->kl_lock = kl_lock; + if (kl_lock == NULL) + knl->kl_unlock = knlist_mtx_unlock; + else + knl->kl_unlock = kl_unlock; + if (kl_locked == NULL) + knl->kl_locked = knlist_mtx_locked; + else + knl->kl_locked = kl_locked; SLIST_INIT(&knl->kl_list); } @@ -1637,7 +1692,7 @@ knlist_destroy(struct knlist *knl) printf("WARNING: destroying knlist w/ knotes on it!\n"); #endif - knl->kl_lock = NULL; + knl->kl_lockarg = knl->kl_lock = knl->kl_unlock = NULL; SLIST_INIT(&knl->kl_list); } @@ -1652,11 +1707,11 @@ knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn) struct kqueue *kq; if (islocked) - mtx_assert(knl->kl_lock, MA_OWNED); + KNL_ASSERT_LOCKED(knl); else { - mtx_assert(knl->kl_lock, MA_NOTOWNED); + KNL_ASSERT_UNLOCKED(knl); again: /* need to reaquire lock since we have dropped it */ - mtx_lock(knl->kl_lock); + knl->kl_lock(knl->kl_lockarg); } SLIST_FOREACH(kn, &knl->kl_list, kn_selnext) { @@ -1686,7 +1741,7 @@ knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn) KQ_LOCK(kq); KASSERT(kn->kn_status & KN_INFLUX, ("knote removed w/o list lock")); - mtx_unlock(knl->kl_lock); + knl->kl_unlock(knl->kl_lockarg); kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0); kq = NULL; @@ -1694,10 +1749,10 @@ knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn) } if (islocked) - mtx_assert(knl->kl_lock, MA_OWNED); + KNL_ASSERT_LOCKED(knl); else { - mtx_unlock(knl->kl_lock); - mtx_assert(knl->kl_lock, MA_NOTOWNED); + knl->kl_unlock(knl->kl_lockarg); + KNL_ASSERT_UNLOCKED(knl); } } diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 75214492a9ad..56ad939f480c 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -271,7 +271,7 @@ fork1(td, flags, pages, procp) #ifdef MAC mac_init_proc(newproc); #endif - knlist_init(&newproc->p_klist, &newproc->p_mtx); + knlist_init(&newproc->p_klist, &newproc->p_mtx, NULL, NULL, NULL); /* We have to lock the process tree while we look for a pid. */ sx_slock(&proctree_lock); diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 267670cd0b16..d79a3198bd23 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -351,8 +351,10 @@ pipe(td, uap) rpipe = &pp->pp_rpipe; wpipe = &pp->pp_wpipe; - knlist_init(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe)); - knlist_init(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe)); + knlist_init(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe), NULL, NULL, + NULL); + knlist_init(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe), NULL, NULL, + NULL); /* Only the forward direction pipe is backed by default */ if (pipe_create(rpipe, 1) || pipe_create(wpipe, 0)) { diff --git a/sys/kern/tty.c b/sys/kern/tty.c index 8a6585b20bdb..62634321a723 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -2865,8 +2865,8 @@ ttymalloc(struct tty *tp) mtx_lock(&tty_list_mutex); TAILQ_INSERT_TAIL(&tty_list, tp, t_list); mtx_unlock(&tty_list_mutex); - knlist_init(&tp->t_rsel.si_note, &tp->t_mtx); - knlist_init(&tp->t_wsel.si_note, &tp->t_mtx); + knlist_init(&tp->t_rsel.si_note, &tp->t_mtx, NULL, NULL, NULL); + knlist_init(&tp->t_wsel.si_note, &tp->t_mtx, NULL, NULL, NULL); return (tp); } diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c index cb1a24c481c9..e979251dd940 100644 --- a/sys/kern/uipc_sockbuf.c +++ b/sys/kern/uipc_sockbuf.c @@ -235,8 +235,10 @@ sonewconn(head, connstatus) mac_create_socket_from_socket(head, so); SOCK_UNLOCK(head); #endif - knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv)); - knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd)); + knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv), + NULL, NULL, NULL); + knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd), + NULL, NULL, NULL); if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { sodealloc(so); diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 7ffc1c4fe53b..c776c0c3bc12 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -211,8 +211,10 @@ socreate(dom, aso, type, proto, cred, td) #ifdef MAC mac_create_socket(cred, so); #endif - knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv)); - knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd)); + knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv), + NULL, NULL, NULL); + knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd), + NULL, NULL, NULL); so->so_count = 1; error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); if (error) { diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index cb1a24c481c9..e979251dd940 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -235,8 +235,10 @@ sonewconn(head, connstatus) mac_create_socket_from_socket(head, so); SOCK_UNLOCK(head); #endif - knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv)); - knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd)); + knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv), + NULL, NULL, NULL); + knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd), + NULL, NULL, NULL); if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { sodealloc(so); diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 7cae398f963a..87ab064df54a 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -1287,7 +1287,7 @@ _aio_aqueue(struct thread *td, struct aiocb *job, struct aio_liojob *lj, int typ aiocbe->inputcharge = 0; aiocbe->outputcharge = 0; /* XXX - need a lock */ - knlist_init(&aiocbe->klist, NULL); + knlist_init(&aiocbe->klist, NULL, NULL, NULL, NULL); suword(&job->_aiocb_private.status, -1); suword(&job->_aiocb_private.error, 0); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 1697696acf40..92cb02578f27 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -98,6 +98,10 @@ static void vfree(struct vnode *); static void vnlru_free(int); static void vdestroy(struct vnode *); static void vgonel(struct vnode *); +static void vfs_knllock(void *arg); +static void vfs_knlunlock(void *arg); +static int vfs_knllocked(void *arg); + /* * Enable Giant pushdown based on whether or not the vm is mpsafe in this @@ -2834,8 +2838,8 @@ v_addpollinfo(struct vnode *vp) } vp->v_pollinfo = vi; mtx_init(&vp->v_pollinfo->vpi_lock, "vnode pollinfo", NULL, MTX_DEF); - knlist_init(&vp->v_pollinfo->vpi_selinfo.si_note, - &vp->v_pollinfo->vpi_lock); + knlist_init(&vp->v_pollinfo->vpi_selinfo.si_note, vp, vfs_knllock, + vfs_knlunlock, vfs_knllocked); } /* @@ -3473,7 +3477,7 @@ vop_create_post(void *ap, int rc) struct vop_create_args *a = ap; if (!rc) - VFS_SEND_KNOTE(a->a_dvp, NOTE_WRITE); + VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void @@ -3482,8 +3486,8 @@ vop_link_post(void *ap, int rc) struct vop_link_args *a = ap; if (!rc) { - VFS_SEND_KNOTE(a->a_vp, NOTE_LINK); - VFS_SEND_KNOTE(a->a_tdvp, NOTE_WRITE); + VFS_KNOTE_LOCKED(a->a_vp, NOTE_LINK); + VFS_KNOTE_LOCKED(a->a_tdvp, NOTE_WRITE); } } @@ -3493,7 +3497,7 @@ vop_mkdir_post(void *ap, int rc) struct vop_mkdir_args *a = ap; if (!rc) - VFS_SEND_KNOTE(a->a_dvp, NOTE_WRITE | NOTE_LINK); + VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); } void @@ -3502,7 +3506,7 @@ vop_mknod_post(void *ap, int rc) struct vop_mknod_args *a = ap; if (!rc) - VFS_SEND_KNOTE(a->a_dvp, NOTE_WRITE); + VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void @@ -3511,8 +3515,8 @@ vop_remove_post(void *ap, int rc) struct vop_remove_args *a = ap; if (!rc) { - VFS_SEND_KNOTE(a->a_dvp, NOTE_WRITE); - VFS_SEND_KNOTE(a->a_vp, NOTE_DELETE); + VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); + VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } @@ -3522,11 +3526,11 @@ vop_rename_post(void *ap, int rc) struct vop_rename_args *a = ap; if (!rc) { - VFS_SEND_KNOTE(a->a_fdvp, NOTE_WRITE); - VFS_SEND_KNOTE(a->a_tdvp, NOTE_WRITE); - VFS_SEND_KNOTE(a->a_fvp, NOTE_RENAME); + VFS_KNOTE_UNLOCKED(a->a_fdvp, NOTE_WRITE); + VFS_KNOTE_UNLOCKED(a->a_tdvp, NOTE_WRITE); + VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME); if (a->a_tvp) - VFS_SEND_KNOTE(a->a_tvp, NOTE_DELETE); + VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE); } if (a->a_tdvp != a->a_fdvp) vdrop(a->a_fdvp); @@ -3543,8 +3547,8 @@ vop_rmdir_post(void *ap, int rc) struct vop_rmdir_args *a = ap; if (!rc) { - VFS_SEND_KNOTE(a->a_dvp, NOTE_WRITE | NOTE_LINK); - VFS_SEND_KNOTE(a->a_vp, NOTE_DELETE); + VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); + VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } @@ -3554,7 +3558,7 @@ vop_setattr_post(void *ap, int rc) struct vop_setattr_args *a = ap; if (!rc) - VFS_SEND_KNOTE(a->a_vp, NOTE_ATTRIB); + VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void @@ -3563,7 +3567,7 @@ vop_symlink_post(void *ap, int rc) struct vop_symlink_args *a = ap; if (!rc) - VFS_SEND_KNOTE(a->a_dvp, NOTE_WRITE); + VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } static struct knlist fs_knlist; @@ -3571,7 +3575,7 @@ static struct knlist fs_knlist; static void vfs_event_init(void *arg) { - knlist_init(&fs_knlist, NULL); + knlist_init(&fs_knlist, NULL, NULL, NULL, NULL); } /* XXX - correct order? */ SYSINIT(vfs_knlist, SI_SUB_VFS, SI_ORDER_ANY, vfs_event_init, NULL); @@ -3658,7 +3662,6 @@ static int filt_vfsread(struct knote *kn, long hint); static int filt_vfswrite(struct knote *kn, long hint); static int filt_vfsvnode(struct knote *kn, long hint); static void filt_vfsdetach(struct knote *kn); - static struct filterops vfsread_filtops = { 1, NULL, filt_vfsdetach, filt_vfsread }; static struct filterops vfswrite_filtops = @@ -3666,11 +3669,36 @@ static struct filterops vfswrite_filtops = static struct filterops vfsvnode_filtops = { 1, NULL, filt_vfsdetach, filt_vfsvnode }; +static void +vfs_knllock(void *arg) +{ + struct vnode *vp = arg; + + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread); +} + +static void +vfs_knlunlock(void *arg) +{ + struct vnode *vp = arg; + + VOP_UNLOCK(vp, 0, curthread); +} + +static int +vfs_knllocked(void *arg) +{ + struct vnode *vp = arg; + + return (VOP_ISLOCKED(vp, curthread) == LK_EXCLUSIVE); +} + int vfs_kqfilter(struct vop_kqfilter_args *ap) { struct vnode *vp = ap->a_vp; struct knote *kn = ap->a_kn; + struct knlist *knl; switch (kn->kn_filter) { case EVFILT_READ: @@ -3692,7 +3720,8 @@ vfs_kqfilter(struct vop_kqfilter_args *ap) v_addpollinfo(vp); if (vp->v_pollinfo == NULL) return (ENOMEM); - knlist_add(&vp->v_pollinfo->vpi_selinfo.si_note, kn, 0); + knl = &vp->v_pollinfo->vpi_selinfo.si_note; + knlist_add(knl, kn, 0); return (0); } @@ -3725,11 +3754,8 @@ filt_vfsread(struct knote *kn, long hint) return (1); } - vn_lock(vp, LK_SHARED | LK_RETRY, curthread); if (VOP_GETATTR(vp, &va, curthread->td_ucred, curthread)) return (0); - if (VOP_UNLOCK(vp, 0, curthread)) - return (0); kn->kn_data = va.va_size - kn->kn_fp->f_offset; return (kn->kn_data != 0); diff --git a/sys/net/bpf.c b/sys/net/bpf.c index 514d6d29d9a7..e917866f3c0e 100644 --- a/sys/net/bpf.c +++ b/sys/net/bpf.c @@ -375,7 +375,7 @@ bpfopen(dev, flags, fmt, td) #endif mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF); callout_init(&d->bd_callout, NET_CALLOUT_MPSAFE); - knlist_init(&d->bd_sel.si_note, &d->bd_mtx); + knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL); return (0); } diff --git a/sys/net/if.c b/sys/net/if.c index e0285af73daf..12090dffc831 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -279,7 +279,7 @@ if_init(void *dummy __unused) IFNET_LOCK_INIT(); TAILQ_INIT(&ifnet); - knlist_init(&ifklist, NULL); + knlist_init(&ifklist, NULL, NULL, NULL, NULL); if_grow(); /* create initial table */ ifdev_byindex(0) = make_dev(&net_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "network"); @@ -473,7 +473,7 @@ if_attach(struct ifnet *ifp) TAILQ_INIT(&ifp->if_addrhead); TAILQ_INIT(&ifp->if_prefixhead); TAILQ_INIT(&ifp->if_multiaddrs); - knlist_init(&ifp->if_klist, NULL); + knlist_init(&ifp->if_klist, NULL, NULL, NULL, NULL); getmicrotime(&ifp->if_lastchange); ifp->if_data.ifi_epoch = time_uptime; ifp->if_data.ifi_datalen = sizeof(struct if_data); diff --git a/sys/sys/event.h b/sys/sys/event.h index 82764bcb4899..c2921c43c09a 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -126,8 +126,11 @@ SLIST_HEAD(klist, knote); struct kqueue; SLIST_HEAD(kqlist, kqueue); struct knlist { - struct mtx *kl_lock; /* lock to protect kll_list */ struct klist kl_list; + void (*kl_lock)(void *); /* lock function */ + void (*kl_unlock)(void *); + int (*kl_locked)(void *); + void *kl_lockarg; /* argument passed to kl_lockf() */ }; @@ -209,7 +212,9 @@ extern void knlist_add(struct knlist *knl, struct knote *kn, int islocked); extern void knlist_remove(struct knlist *knl, struct knote *kn, int islocked); extern void knlist_remove_inevent(struct knlist *knl, struct knote *kn); extern int knlist_empty(struct knlist *knl); -extern void knlist_init(struct knlist *knl, struct mtx *mtx); +extern void knlist_init(struct knlist *knl, void *lock, + void (*kl_lock)(void *), void (*kl_unlock)(void *), + int (*kl_locked)(void *)); extern void knlist_destroy(struct knlist *knl); extern void knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn); diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 3ab383e0fafd..c70347cf9871 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -550,11 +550,18 @@ extern int mpsafe_vfs; mtx_assert(&Giant, MA_OWNED); \ } while (0) -#define VFS_SEND_KNOTE(vp, hint) do \ +#define VFS_KNOTE_LOCKED(vp, hint) do \ { \ if ((vp)->v_mount && \ ((vp)->v_mount->mnt_kern_flag & MNTK_NOKNOTE) == 0) \ - VN_KNOTE_UNLOCKED((vp), (hint)); \ + VN_KNOTE((vp), (hint), 1); \ +} while (0) + +#define VFS_KNOTE_UNLOCKED(vp, hint) do \ +{ \ + if ((vp)->v_mount && \ + ((vp)->v_mount->mnt_kern_flag & MNTK_NOKNOTE) == 0) \ + VN_KNOTE((vp), (hint), 0); \ } while (0) #include diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index b201f6d15134..2c9fd309263a 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -231,8 +231,8 @@ struct xvnode { if (!VN_KNLIST_EMPTY(vp)) \ KNOTE(&vp->v_pollinfo->vpi_selinfo.si_note, (b), (a)); \ } while (0) -#define VN_KNOTE_LOCKED(vp, b) VN_KNOTE(vp, b, 1) -#define VN_KNOTE_UNLOCKED(vp, b) VN_KNOTE(vp, b, 0) +#define VN_KNOTE_UNLOCKED(vp, b) VN_KNOTE(vp, b, 0) +#define VN_KNOTE_UNLOCKED(vp, b) VN_KNOTE(vp, b, 0) /* * Vnode flags. @@ -702,7 +702,7 @@ void vop_unlock_pre(void *a); #define VOP_WRITE_POST(ap, ret) \ noffset = (ap)->a_uio->uio_offset; \ if (noffset > ooffset && !VN_KNLIST_EMPTY((ap)->a_vp)) { \ - VFS_SEND_KNOTE((ap)->a_vp, NOTE_WRITE \ + VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE \ | (noffset > osize ? NOTE_EXTEND : 0)); \ }