From 106ebb761a59ed9f60cd467aebd917479132e1aa Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 16 Dec 2015 08:48:37 +0000 Subject: [PATCH] Optimize vop_stdadvise(POSIX_FADV_DONTNEED). Instead of looking up a buffer for each block number in the range with gbincore(), look up the next instantiated buffer with the logical block number which is greater or equal to the next lblkno. This significantly speeds up the iteration for sparce-populated range. Move the iteration into new helper bnoreuselist(), which is structured similarly to flushbuflist(). Reported and tested by: pho Reviewed by: markj Sponsored by: The FreeBSD Foundation --- sys/kern/vfs_default.c | 42 +++++++++++++----------------------------- sys/kern/vfs_subr.c | 39 +++++++++++++++++++++++++++++++++++++++ sys/sys/vnode.h | 2 ++ 3 files changed, 54 insertions(+), 29 deletions(-) diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 6ee094b9144b..e71294a78ae8 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -1034,10 +1034,9 @@ vop_stdallocate(struct vop_allocate_args *ap) int vop_stdadvise(struct vop_advise_args *ap) { - struct buf *bp; - struct buflists *bl; struct vnode *vp; - daddr_t bn, startn, endn; + struct bufobj *bo; + daddr_t startn, endn; off_t start, end; int bsize, error; @@ -1074,36 +1073,21 @@ vop_stdadvise(struct vop_advise_args *ap) VM_OBJECT_WUNLOCK(vp->v_object); } - BO_RLOCK(&vp->v_bufobj); + bo = &vp->v_bufobj; + BO_RLOCK(bo); bsize = vp->v_bufobj.bo_bsize; startn = ap->a_start / bsize; - endn = -1; - bl = &vp->v_bufobj.bo_clean.bv_hd; - if (!TAILQ_EMPTY(bl)) - endn = TAILQ_LAST(bl, buflists)->b_lblkno; - bl = &vp->v_bufobj.bo_dirty.bv_hd; - if (!TAILQ_EMPTY(bl) && - endn < TAILQ_LAST(bl, buflists)->b_lblkno) - endn = TAILQ_LAST(bl, buflists)->b_lblkno; - if (ap->a_end != OFF_MAX && endn != -1) - endn = ap->a_end / bsize; - BO_RUNLOCK(&vp->v_bufobj); - /* - * In the VMIO case, use the B_NOREUSE flag to hint that the - * pages backing each buffer in the range are unlikely to be - * reused. Dirty buffers will have the hint applied once - * they've been written. - */ - for (bn = startn; bn <= endn; bn++) { - bp = getblk(vp, bn, bsize, 0, 0, GB_NOCREAT | - GB_UNMAPPED); - if (bp == NULL) + endn = ap->a_end / bsize; + for (;;) { + error = bnoreuselist(&bo->bo_clean, bo, startn, endn); + if (error == EAGAIN) continue; - bp->b_flags |= B_RELBUF; - if (vp->v_object != NULL) - bp->b_flags |= B_NOREUSE; - brelse(bp); + error = bnoreuselist(&bo->bo_dirty, bo, startn, endn); + if (error == EAGAIN) + continue; + break; } + BO_RUNLOCK(bo); VOP_UNLOCK(vp, 0); break; default: diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 9f9be55f2977..ace97e86fcc8 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1660,6 +1660,45 @@ flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, return (retval); } +int +bnoreuselist(struct bufv *bufv, struct bufobj *bo, daddr_t startn, daddr_t endn) +{ + struct buf *bp; + int error; + daddr_t lblkno; + + ASSERT_BO_LOCKED(bo); + + for (lblkno = startn;; lblkno++) { + bp = BUF_PCTRIE_LOOKUP_GE(&bufv->bv_root, lblkno); + if (bp == NULL || bp->b_lblkno >= endn) + break; + error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | + LK_INTERLOCK, BO_LOCKPTR(bo), "brlsfl", 0, 0); + if (error != 0) { + BO_RLOCK(bo); + return (error != ENOLCK ? error : EAGAIN); + } + KASSERT(bp->b_bufobj == bo, + ("bp %p wrong b_bufobj %p should be %p", + bp, bp->b_bufobj, bo)); + if ((bp->b_flags & B_MANAGED) == 0) + bremfree(bp); + bp->b_flags |= B_RELBUF; + /* + * In the VMIO case, use the B_NOREUSE flag to hint that the + * pages backing each buffer in the range are unlikely to be + * reused. Dirty buffers will have the hint applied once + * they've been written. + */ + if (bp->b_vp->v_object != NULL) + bp->b_flags |= B_NOREUSE; + brelse(bp); + BO_RLOCK(bo); + } + return (0); +} + /* * Truncate a file's buffer and pages to a specified length. This * is in lieu of the old vinvalbuf mechanism, which performed unneeded diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 2ee63f255ea4..7706b255d0b2 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -605,6 +605,8 @@ struct vnode; typedef int (*vn_get_ino_t)(struct mount *, void *, int, struct vnode **); +int bnoreuselist(struct bufv *bufv, struct bufobj *bo, daddr_t startn, + daddr_t endn); /* cache_* may belong in namei.h. */ void cache_changesize(int newhashsize); #define cache_enter(dvp, vp, cnp) \