mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-30 16:51:41 +00:00
Detect and optimize reads from the hole on UFS.
- Create getblkx(9) variant of getblk(9) which can return error. - Add GB_NOSPARSE flag for getblk()/getblkx() which requests that BMAP was performed before the buffer is created, and EJUSTRETURN returned in case the requested block does not exist. - Make ffs_read() use GB_NOSPARSE to avoid instantiating buffer (and allocating the pages for it), copying from zero_region instead. The end result is less page allocations and buffer recycling when a hole is read, which is important for some benchmarks. Requested and reviewed by: jeff Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D14917
This commit is contained in:
parent
f1401123c5
commit
2ebc882927
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=333576
@ -2138,30 +2138,37 @@ breadn_flags(struct vnode *vp, daddr_t blkno, int size, daddr_t *rablkno,
|
||||
void (*ckhashfunc)(struct buf *), struct buf **bpp)
|
||||
{
|
||||
struct buf *bp;
|
||||
int readwait, rv;
|
||||
struct thread *td;
|
||||
int error, readwait, rv;
|
||||
|
||||
CTR3(KTR_BUF, "breadn(%p, %jd, %d)", vp, blkno, size);
|
||||
td = curthread;
|
||||
/*
|
||||
* Can only return NULL if GB_LOCK_NOWAIT flag is specified.
|
||||
* Can only return NULL if GB_LOCK_NOWAIT or GB_SPARSE flags
|
||||
* are specified.
|
||||
*/
|
||||
*bpp = bp = getblk(vp, blkno, size, 0, 0, flags);
|
||||
if (bp == NULL)
|
||||
return (EBUSY);
|
||||
error = getblkx(vp, blkno, size, 0, 0, flags, &bp);
|
||||
if (error != 0) {
|
||||
*bpp = NULL;
|
||||
return (error);
|
||||
}
|
||||
flags &= ~GB_NOSPARSE;
|
||||
*bpp = bp;
|
||||
|
||||
/*
|
||||
* If not found in cache, do some I/O
|
||||
*/
|
||||
readwait = 0;
|
||||
if ((bp->b_flags & B_CACHE) == 0) {
|
||||
if (!TD_IS_IDLETHREAD(curthread)) {
|
||||
if (!TD_IS_IDLETHREAD(td)) {
|
||||
#ifdef RACCT
|
||||
if (racct_enable) {
|
||||
PROC_LOCK(curproc);
|
||||
racct_add_buf(curproc, bp, 0);
|
||||
PROC_UNLOCK(curproc);
|
||||
PROC_LOCK(td->td_proc);
|
||||
racct_add_buf(td->td_proc, bp, 0);
|
||||
PROC_UNLOCK(td->td_proc);
|
||||
}
|
||||
#endif /* RACCT */
|
||||
curthread->td_ru.ru_inblock++;
|
||||
td->td_ru.ru_inblock++;
|
||||
}
|
||||
bp->b_iocmd = BIO_READ;
|
||||
bp->b_flags &= ~B_INVAL;
|
||||
@ -3822,8 +3829,21 @@ bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags)
|
||||
}
|
||||
}
|
||||
|
||||
struct buf *
|
||||
getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
|
||||
int flags)
|
||||
{
|
||||
struct buf *bp;
|
||||
int error;
|
||||
|
||||
error = getblkx(vp, blkno, size, slpflag, slptimeo, flags, &bp);
|
||||
if (error != 0)
|
||||
return (NULL);
|
||||
return (bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* getblk:
|
||||
* getblkx:
|
||||
*
|
||||
* Get a block given a specified block and offset into a file/device.
|
||||
* The buffers B_DONE bit will be cleared on return, making it almost
|
||||
@ -3858,12 +3878,13 @@ bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int size, int gbflags)
|
||||
* intends to issue a READ, the caller must clear B_INVAL and BIO_ERROR
|
||||
* prior to issuing the READ. biodone() will *not* clear B_INVAL.
|
||||
*/
|
||||
struct buf *
|
||||
getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
|
||||
int flags)
|
||||
int
|
||||
getblkx(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
|
||||
int flags, struct buf **bpp)
|
||||
{
|
||||
struct buf *bp;
|
||||
struct bufobj *bo;
|
||||
daddr_t d_blkno;
|
||||
int bsize, error, maxsize, vmio;
|
||||
off_t offset;
|
||||
|
||||
@ -3878,6 +3899,7 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
|
||||
flags &= ~(GB_UNMAPPED | GB_KVAALLOC);
|
||||
|
||||
bo = &vp->v_bufobj;
|
||||
d_blkno = blkno;
|
||||
loop:
|
||||
BO_RLOCK(bo);
|
||||
bp = gbincore(bo, blkno);
|
||||
@ -3889,7 +3911,7 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
|
||||
*/
|
||||
lockflags = LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK;
|
||||
|
||||
if (flags & GB_LOCK_NOWAIT)
|
||||
if ((flags & GB_LOCK_NOWAIT) != 0)
|
||||
lockflags |= LK_NOWAIT;
|
||||
|
||||
error = BUF_TIMELOCK(bp, lockflags,
|
||||
@ -3902,8 +3924,8 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
|
||||
if (error == ENOLCK)
|
||||
goto loop;
|
||||
/* We timed out or were interrupted. */
|
||||
else if (error)
|
||||
return (NULL);
|
||||
else if (error != 0)
|
||||
return (error);
|
||||
/* If recursed, assume caller knows the rules. */
|
||||
else if (BUF_LOCKRECURSED(bp))
|
||||
goto end;
|
||||
@ -4008,10 +4030,10 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
|
||||
* here.
|
||||
*/
|
||||
if (flags & GB_NOCREAT)
|
||||
return NULL;
|
||||
return (EEXIST);
|
||||
if (bdomain[bo->bo_domain].bd_freebuffers == 0 &&
|
||||
TD_IS_IDLETHREAD(curthread))
|
||||
return NULL;
|
||||
return (EBUSY);
|
||||
|
||||
bsize = vn_isdisk(vp, NULL) ? DEV_BSIZE : bo->bo_bsize;
|
||||
KASSERT(bsize != 0, ("bsize == 0, check bo->bo_bsize"));
|
||||
@ -4025,11 +4047,22 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
|
||||
flags &= ~(GB_UNMAPPED | GB_KVAALLOC);
|
||||
}
|
||||
maxsize = imax(maxsize, bsize);
|
||||
if ((flags & GB_NOSPARSE) != 0 && vmio &&
|
||||
!vn_isdisk(vp, NULL)) {
|
||||
error = VOP_BMAP(vp, blkno, NULL, &d_blkno, 0, 0);
|
||||
KASSERT(error != EOPNOTSUPP,
|
||||
("GB_NOSPARSE from fs not supporting bmap, vp %p",
|
||||
vp));
|
||||
if (error != 0)
|
||||
return (error);
|
||||
if (d_blkno == -1)
|
||||
return (EJUSTRETURN);
|
||||
}
|
||||
|
||||
bp = getnewbuf(vp, slpflag, slptimeo, maxsize, flags);
|
||||
if (bp == NULL) {
|
||||
if (slpflag || slptimeo)
|
||||
return NULL;
|
||||
return (ETIMEDOUT);
|
||||
/*
|
||||
* XXX This is here until the sleep path is diagnosed
|
||||
* enough to work under very low memory conditions.
|
||||
@ -4075,7 +4108,8 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
|
||||
* Insert the buffer into the hash, so that it can
|
||||
* be found by incore.
|
||||
*/
|
||||
bp->b_blkno = bp->b_lblkno = blkno;
|
||||
bp->b_lblkno = blkno;
|
||||
bp->b_blkno = d_blkno;
|
||||
bp->b_offset = offset;
|
||||
bgetvp(vp, bp);
|
||||
BO_UNLOCK(bo);
|
||||
@ -4110,7 +4144,8 @@ getblk(struct vnode *vp, daddr_t blkno, int size, int slpflag, int slptimeo,
|
||||
buf_track(bp, __func__);
|
||||
KASSERT(bp->b_bufobj == bo,
|
||||
("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
|
||||
return (bp);
|
||||
*bpp = bp;
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -94,12 +94,14 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
|
||||
{
|
||||
struct buf *bp, *rbp, *reqbp;
|
||||
struct bufobj *bo;
|
||||
struct thread *td;
|
||||
daddr_t blkno, origblkno;
|
||||
int maxra, racluster;
|
||||
int error, ncontig;
|
||||
int i;
|
||||
|
||||
error = 0;
|
||||
td = curthread;
|
||||
bo = &vp->v_bufobj;
|
||||
if (!unmapped_buf_allowed)
|
||||
gbflags &= ~GB_UNMAPPED;
|
||||
@ -118,10 +120,14 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
|
||||
/*
|
||||
* get the requested block
|
||||
*/
|
||||
*bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0, gbflags);
|
||||
if (bp == NULL)
|
||||
return (EBUSY);
|
||||
error = getblkx(vp, lblkno, size, 0, 0, gbflags, &bp);
|
||||
if (error != 0) {
|
||||
*bpp = NULL;
|
||||
return (error);
|
||||
}
|
||||
gbflags &= ~GB_NOSPARSE;
|
||||
origblkno = lblkno;
|
||||
*bpp = reqbp = bp;
|
||||
|
||||
/*
|
||||
* if it is in the cache, then check to see if the reads have been
|
||||
@ -243,12 +249,12 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
|
||||
bstrategy(bp);
|
||||
#ifdef RACCT
|
||||
if (racct_enable) {
|
||||
PROC_LOCK(curproc);
|
||||
racct_add_buf(curproc, bp, 0);
|
||||
PROC_UNLOCK(curproc);
|
||||
PROC_LOCK(td->td_proc);
|
||||
racct_add_buf(td->td_proc, bp, 0);
|
||||
PROC_UNLOCK(td->td_proc);
|
||||
}
|
||||
#endif /* RACCT */
|
||||
curthread->td_ru.ru_inblock++;
|
||||
td->td_ru.ru_inblock++;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -303,12 +309,12 @@ cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
|
||||
bstrategy(rbp);
|
||||
#ifdef RACCT
|
||||
if (racct_enable) {
|
||||
PROC_LOCK(curproc);
|
||||
racct_add_buf(curproc, rbp, 0);
|
||||
PROC_UNLOCK(curproc);
|
||||
PROC_LOCK(td->td_proc);
|
||||
racct_add_buf(td->td_proc, rbp, 0);
|
||||
PROC_UNLOCK(td->td_proc);
|
||||
}
|
||||
#endif /* RACCT */
|
||||
curthread->td_ru.ru_inblock++;
|
||||
td->td_ru.ru_inblock++;
|
||||
}
|
||||
|
||||
if (reqbp) {
|
||||
|
@ -479,6 +479,7 @@ buf_track(struct buf *bp, const char *location)
|
||||
#define GB_UNMAPPED 0x0008 /* Do not mmap buffer pages. */
|
||||
#define GB_KVAALLOC 0x0010 /* But allocate KVA. */
|
||||
#define GB_CKHASH 0x0020 /* If reading, calc checksum hash */
|
||||
#define GB_NOSPARSE 0x0040 /* Do not instantiate holes */
|
||||
|
||||
#ifdef _KERNEL
|
||||
extern int nbuf; /* The number of buffer headers */
|
||||
@ -540,6 +541,8 @@ struct buf * getpbuf(int *);
|
||||
struct buf *incore(struct bufobj *, daddr_t);
|
||||
struct buf *gbincore(struct bufobj *, daddr_t);
|
||||
struct buf *getblk(struct vnode *, daddr_t, int, int, int, int);
|
||||
int getblkx(struct vnode *vp, daddr_t blkno, int size, int slpflag,
|
||||
int slptimeo, int flags, struct buf **bpp);
|
||||
struct buf *geteblk(int, int);
|
||||
int bufwait(struct buf *);
|
||||
int bufwrite(struct buf *);
|
||||
|
@ -462,6 +462,26 @@ ffs_lock(ap)
|
||||
#endif
|
||||
}
|
||||
|
||||
static int
|
||||
ffs_read_hole(struct uio *uio, long xfersize, long *size)
|
||||
{
|
||||
ssize_t saved_resid, tlen;
|
||||
int error;
|
||||
|
||||
while (xfersize > 0) {
|
||||
tlen = min(xfersize, ZERO_REGION_SIZE);
|
||||
saved_resid = uio->uio_resid;
|
||||
error = vn_io_fault_uiomove(__DECONST(void *, zero_region),
|
||||
tlen, uio);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
tlen = saved_resid - uio->uio_resid;
|
||||
xfersize -= tlen;
|
||||
*size -= tlen;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Vnode op for reading.
|
||||
*/
|
||||
@ -483,9 +503,7 @@ ffs_read(ap)
|
||||
off_t bytesinfile;
|
||||
long size, xfersize, blkoffset;
|
||||
ssize_t orig_resid;
|
||||
int error;
|
||||
int seqcount;
|
||||
int ioflag;
|
||||
int bflag, error, ioflag, seqcount;
|
||||
|
||||
vp = ap->a_vp;
|
||||
uio = ap->a_uio;
|
||||
@ -529,6 +547,7 @@ ffs_read(ap)
|
||||
uio->uio_offset >= fs->fs_maxfilesize)
|
||||
return (EOVERFLOW);
|
||||
|
||||
bflag = GB_UNMAPPED | (uio->uio_segflg == UIO_NOCOPY ? 0 : GB_NOSPARSE);
|
||||
for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
|
||||
if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
|
||||
break;
|
||||
@ -565,8 +584,7 @@ ffs_read(ap)
|
||||
/*
|
||||
* Don't do readahead if this is the end of the file.
|
||||
*/
|
||||
error = bread_gb(vp, lbn, size, NOCRED,
|
||||
GB_UNMAPPED, &bp);
|
||||
error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
|
||||
} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
|
||||
/*
|
||||
* Otherwise if we are allowed to cluster,
|
||||
@ -577,7 +595,7 @@ ffs_read(ap)
|
||||
*/
|
||||
error = cluster_read(vp, ip->i_size, lbn,
|
||||
size, NOCRED, blkoffset + uio->uio_resid,
|
||||
seqcount, GB_UNMAPPED, &bp);
|
||||
seqcount, bflag, &bp);
|
||||
} else if (seqcount > 1) {
|
||||
/*
|
||||
* If we are NOT allowed to cluster, then
|
||||
@ -589,17 +607,21 @@ ffs_read(ap)
|
||||
*/
|
||||
u_int nextsize = blksize(fs, ip, nextlbn);
|
||||
error = breadn_flags(vp, lbn, size, &nextlbn,
|
||||
&nextsize, 1, NOCRED, GB_UNMAPPED, NULL, &bp);
|
||||
&nextsize, 1, NOCRED, bflag, NULL, &bp);
|
||||
} else {
|
||||
/*
|
||||
* Failing all of the above, just read what the
|
||||
* user asked for. Interestingly, the same as
|
||||
* the first option above.
|
||||
*/
|
||||
error = bread_gb(vp, lbn, size, NOCRED,
|
||||
GB_UNMAPPED, &bp);
|
||||
error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
|
||||
}
|
||||
if (error) {
|
||||
if (error == EJUSTRETURN) {
|
||||
error = ffs_read_hole(uio, xfersize, &size);
|
||||
if (error == 0)
|
||||
continue;
|
||||
}
|
||||
if (error != 0) {
|
||||
brelse(bp);
|
||||
bp = NULL;
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user