diff --git a/share/man/man9/VOP_GETPAGES.9 b/share/man/man9/VOP_GETPAGES.9 index 2cc5b7acff10..4625d9b57115 100644 --- a/share/man/man9/VOP_GETPAGES.9 +++ b/share/man/man9/VOP_GETPAGES.9 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 12, 2014 +.Dd December 16, 2015 .Dt VOP_GETPAGES 9 .Os .Sh NAME @@ -41,7 +41,7 @@ .In sys/vnode.h .In vm/vm.h .Ft int -.Fn VOP_GETPAGES "struct vnode *vp" "vm_page_t *ma" "int count" "int reqpage" +.Fn VOP_GETPAGES "struct vnode *vp" "vm_page_t *ma" "int count" "int *rbehind" "int *rahead" .Ft int .Fn VOP_PUTPAGES "struct vnode *vp" "vm_page_t *ma" "int count" "int sync" "int *rtvals" .Sh DESCRIPTION @@ -63,7 +63,7 @@ locks are held. Both methods return in the same state on both success and error returns. .Pp The arguments are: -.Bl -tag -width reqpage +.Bl -tag -width rbehind .It Fa vp The file to access. .It Fa ma @@ -78,9 +78,16 @@ if the write should be synchronous. An array of VM system result codes indicating the status of each page written by .Fn VOP_PUTPAGES . -.It Fa reqpage -The index in the page array of the requested page; i.e., the one page which -the implementation of this method must handle. +.It Fa rbehind +Optional pointer to integer specifying number of pages to be read behind, if +possible. +If the filesystem supports that feature, number of actually read pages is +reported back, otherwise zero is returned. +.It Fa rahead +Optional pointer to integer specifying number of pages to be read ahead, if +possible. +If the filesystem supports that feature, number of actually read pages is +reported back, otherwise zero is returned. .El .Pp The status of the diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index d1ed9dac23f3..b5af5415ef26 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -5762,12 +5762,13 @@ ioflags(int ioflags) } static int -zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) +zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int *rbehind, + int *rahead) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zp->z_zfsvfs->z_os; - vm_page_t mfirst, mlast, mreq; + vm_page_t mlast; vm_object_t object; caddr_t va; struct sf_buf *sf; @@ -5776,82 +5777,46 @@ zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) vm_pindex_t reqstart, reqend; int pcount, lsize, reqsize, size; + if (rbehind) + *rbehind = 0; + if (rahead) + *rahead = 0; + ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); pcount = OFF_TO_IDX(round_page(count)); - mreq = m[reqpage]; - object = mreq->object; - error = 0; - - if (pcount > 1 && zp->z_blksz > PAGESIZE) { - startoff = rounddown(IDX_TO_OFF(mreq->pindex), zp->z_blksz); - reqstart = OFF_TO_IDX(round_page(startoff)); - if (reqstart < m[0]->pindex) - reqstart = 0; - else - reqstart = reqstart - m[0]->pindex; - endoff = roundup(IDX_TO_OFF(mreq->pindex) + PAGE_SIZE, - zp->z_blksz); - reqend = OFF_TO_IDX(trunc_page(endoff)) - 1; - if (reqend > m[pcount - 1]->pindex) - reqend = m[pcount - 1]->pindex; - reqsize = reqend - m[reqstart]->pindex + 1; - KASSERT(reqstart <= reqpage && reqpage < reqstart + reqsize, - ("reqpage beyond [reqstart, reqstart + reqsize[ bounds")); - } else { - reqstart = reqpage; - reqsize = 1; - } - mfirst = m[reqstart]; - mlast = m[reqstart + reqsize - 1]; zfs_vmobject_wlock(object); - - for (i = 0; i < reqstart; i++) { - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); - } - for (i = reqstart + reqsize; i < pcount; i++) { - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); - } - - if (mreq->valid && reqsize == 1) { - if (mreq->valid != VM_PAGE_BITS_ALL) - vm_page_zero_invalid(mreq, TRUE); + if (m[pcount - 1]->valid != 0 && --pcount == 0) { zfs_vmobject_wunlock(object); ZFS_EXIT(zfsvfs); return (zfs_vm_pagerret_ok); } - PCPU_INC(cnt.v_vnodein); - PCPU_ADD(cnt.v_vnodepgsin, reqsize); + object = m[0]->object; + mlast = m[pcount - 1]; - if (IDX_TO_OFF(mreq->pindex) >= object->un_pager.vnp.vnp_size) { - for (i = reqstart; i < reqstart + reqsize; i++) { - if (i != reqpage) { - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); - } - } + if (IDX_TO_OFF(mlast->pindex) >= + object->un_pager.vnp.vnp_size) { zfs_vmobject_wunlock(object); ZFS_EXIT(zfsvfs); return (zfs_vm_pagerret_bad); } + PCPU_INC(cnt.v_vnodein); + PCPU_ADD(cnt.v_vnodepgsin, reqsize); + lsize = PAGE_SIZE; if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size) - lsize = object->un_pager.vnp.vnp_size - IDX_TO_OFF(mlast->pindex); - + lsize = object->un_pager.vnp.vnp_size - + IDX_TO_OFF(mlast->pindex); zfs_vmobject_wunlock(object); - for (i = reqstart; i < reqstart + reqsize; i++) { + error = 0; + for (i = 0; i < pcount; i++) { size = PAGE_SIZE; - if (i == (reqstart + reqsize - 1)) + if (i == pcount - 1) size = lsize; va = zfs_map_page(m[i], &sf); error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex), @@ -5860,21 +5825,15 @@ zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) bzero(va + size, PAGE_SIZE - size); zfs_unmap_page(sf); if (error != 0) - break; + goto out; } zfs_vmobject_wlock(object); - - for (i = reqstart; i < reqstart + reqsize; i++) { - if (!error) - m[i]->valid = VM_PAGE_BITS_ALL; - KASSERT(m[i]->dirty == 0, ("zfs_getpages: page %p is dirty", m[i])); - if (i != reqpage) - vm_page_readahead_finish(m[i]); - } - + for (i = 0; i < pcount; i++) + m[i]->valid = VM_PAGE_BITS_ALL; zfs_vmobject_wunlock(object); +out: ZFS_ACCESSTIME_STAMP(zfsvfs, zp); ZFS_EXIT(zfsvfs); return (error ? zfs_vm_pagerret_error : zfs_vm_pagerret_ok); @@ -5886,11 +5845,13 @@ zfs_freebsd_getpages(ap) struct vnode *a_vp; vm_page_t *a_m; int a_count; - int a_reqpage; + int *a_rbehind; + int *a_rahead; } */ *ap; { - return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage)); + return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, + ap->a_rahead)); } static int diff --git a/sys/dev/drm2/i915/i915_gem.c b/sys/dev/drm2/i915/i915_gem.c index 07b516c07ed4..95edcf9acd24 100644 --- a/sys/dev/drm2/i915/i915_gem.c +++ b/sys/dev/drm2/i915/i915_gem.c @@ -4338,7 +4338,7 @@ i915_gem_wire_page(vm_object_t object, vm_pindex_t pindex, bool *fresh) page = vm_page_grab(object, pindex, VM_ALLOC_NORMAL); if (page->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(object, pindex, NULL, NULL)) { - rv = vm_pager_get_pages(object, &page, 1, 0); + rv = vm_pager_get_pages(object, &page, 1, NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(page); vm_page_free(page); diff --git a/sys/dev/drm2/ttm/ttm_tt.c b/sys/dev/drm2/ttm/ttm_tt.c index 2dd6fb4d1a3e..1e2db3cd8755 100644 --- a/sys/dev/drm2/ttm/ttm_tt.c +++ b/sys/dev/drm2/ttm/ttm_tt.c @@ -291,7 +291,8 @@ int ttm_tt_swapin(struct ttm_tt *ttm) from_page = vm_page_grab(obj, i, VM_ALLOC_NORMAL); if (from_page->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(obj, i, NULL, NULL)) { - rv = vm_pager_get_pages(obj, &from_page, 1, 0); + rv = vm_pager_get_pages(obj, &from_page, 1, + NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(from_page); vm_page_free(from_page); diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index 6405ad5f02b5..2fa9f460edd9 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -1019,7 +1019,8 @@ mdstart_swap(struct md_s *sc, struct bio *bp) if (m->valid == VM_PAGE_BITS_ALL) rv = VM_PAGER_OK; else - rv = vm_pager_get_pages(sc->object, &m, 1, 0); + rv = vm_pager_get_pages(sc->object, &m, 1, + NULL, NULL); if (rv == VM_PAGER_ERROR) { vm_page_xunbusy(m); break; @@ -1046,7 +1047,8 @@ mdstart_swap(struct md_s *sc, struct bio *bp) } } else if (bp->bio_cmd == BIO_WRITE) { if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL) - rv = vm_pager_get_pages(sc->object, &m, 1, 0); + rv = vm_pager_get_pages(sc->object, &m, 1, + NULL, NULL); else rv = VM_PAGER_OK; if (rv == VM_PAGER_ERROR) { @@ -1065,7 +1067,8 @@ mdstart_swap(struct md_s *sc, struct bio *bp) m->valid = VM_PAGE_BITS_ALL; } else if (bp->bio_cmd == BIO_DELETE) { if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL) - rv = vm_pager_get_pages(sc->object, &m, 1, 0); + rv = vm_pager_get_pages(sc->object, &m, 1, + NULL, NULL); else rv = VM_PAGER_OK; if (rv == VM_PAGER_ERROR) { diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index 12b97781dd07..50318647a2f5 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -1753,6 +1753,10 @@ fuse_vnop_getpages(struct vop_getpages_args *ap) cred = curthread->td_ucred; /* XXX */ pages = ap->a_m; count = ap->a_count; + if (ap->a_rbehind) + *ap->a_rbehind = 0; + if (ap->a_rahead) + *ap->a_rahead = 0; if (!fsess_opt_mmap(vnode_mount(vp))) { FS_DEBUG("called on non-cacheable vnode??\n"); @@ -1761,26 +1765,21 @@ fuse_vnop_getpages(struct vop_getpages_args *ap) npages = btoc(count); /* - * If the requested page is partially valid, just return it and - * allow the pager to zero-out the blanks. Partially valid pages - * can only occur at the file EOF. + * If the last page is partially valid, just return it and allow + * the pager to zero-out the blanks. Partially valid pages can + * only occur at the file EOF. + * + * XXXGL: is that true for FUSE, which is a local filesystem, + * but still somewhat disconnected from the kernel? */ - VM_OBJECT_WLOCK(vp->v_object); - fuse_vm_page_lock_queues(); - if (pages[ap->a_reqpage]->valid != 0) { - for (i = 0; i < npages; ++i) { - if (i != ap->a_reqpage) { - fuse_vm_page_lock(pages[i]); - vm_page_free(pages[i]); - fuse_vm_page_unlock(pages[i]); - } + if (pages[npages - 1]->valid != 0) { + if (--npages == 0) { + VM_OBJECT_WUNLOCK(vp->v_object); + return (VM_PAGER_OK); } - fuse_vm_page_unlock_queues(); - VM_OBJECT_WUNLOCK(vp->v_object); - return 0; - } - fuse_vm_page_unlock_queues(); + count = npages << PAGE_SHIFT; + } VM_OBJECT_WUNLOCK(vp->v_object); /* @@ -1811,17 +1810,6 @@ fuse_vnop_getpages(struct vop_getpages_args *ap) if (error && (uio.uio_resid == count)) { FS_DEBUG("error %d\n", error); - VM_OBJECT_WLOCK(vp->v_object); - fuse_vm_page_lock_queues(); - for (i = 0; i < npages; ++i) { - if (i != ap->a_reqpage) { - fuse_vm_page_lock(pages[i]); - vm_page_free(pages[i]); - fuse_vm_page_unlock(pages[i]); - } - } - fuse_vm_page_unlock_queues(); - VM_OBJECT_WUNLOCK(vp->v_object); return VM_PAGER_ERROR; } /* @@ -1862,8 +1850,6 @@ fuse_vnop_getpages(struct vop_getpages_args *ap) */ ; } - if (i != ap->a_reqpage) - vm_page_readahead_finish(m); } fuse_vm_page_unlock_queues(); VM_OBJECT_WUNLOCK(vp->v_object); diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c index 53ba7efe418f..5647868011ac 100644 --- a/sys/fs/nfsclient/nfs_clbio.c +++ b/sys/fs/nfsclient/nfs_clbio.c @@ -101,6 +101,10 @@ ncl_getpages(struct vop_getpages_args *ap) nmp = VFSTONFS(vp->v_mount); pages = ap->a_m; count = ap->a_count; + if (ap->a_rbehind) + *ap->a_rbehind = 0; + if (ap->a_rahead) + *ap->a_rahead = 0; if ((object = vp->v_object) == NULL) { ncl_printf("nfs_getpages: called with non-merged cache vnode??\n"); @@ -132,12 +136,18 @@ ncl_getpages(struct vop_getpages_args *ap) * If the requested page is partially valid, just return it and * allow the pager to zero-out the blanks. Partially valid pages * can only occur at the file EOF. + * + * XXXGL: is that true for NFS, where short read can occur??? */ - if (pages[ap->a_reqpage]->valid != 0) { - vm_pager_free_nonreq(object, pages, ap->a_reqpage, npages, - FALSE); - return (VM_PAGER_OK); + VM_OBJECT_WLOCK(object); + if (pages[npages - 1]->valid != 0) { + if (--npages == 0) { + VM_OBJECT_WUNLOCK(object); + return (VM_PAGER_OK); + } + count = npages << PAGE_SHIFT; } + VM_OBJECT_WUNLOCK(object); /* * We use only the kva address for the buffer, but this is extremely @@ -167,8 +177,6 @@ ncl_getpages(struct vop_getpages_args *ap) if (error && (uio.uio_resid == count)) { ncl_printf("nfs_getpages: error %d\n", error); - vm_pager_free_nonreq(object, pages, ap->a_reqpage, npages, - FALSE); return (VM_PAGER_ERROR); } @@ -212,8 +220,6 @@ ncl_getpages(struct vop_getpages_args *ap) */ ; } - if (i != ap->a_reqpage) - vm_page_readahead_finish(m); } VM_OBJECT_WUNLOCK(object); return (0); diff --git a/sys/fs/smbfs/smbfs_io.c b/sys/fs/smbfs/smbfs_io.c index a567ce6bf19c..5fe6f11dbbf1 100644 --- a/sys/fs/smbfs/smbfs_io.c +++ b/sys/fs/smbfs/smbfs_io.c @@ -424,7 +424,7 @@ smbfs_getpages(ap) #ifdef SMBFS_RWGENERIC return vop_stdgetpages(ap); #else - int i, error, nextoff, size, toff, npages, count, reqpage; + int i, error, nextoff, size, toff, npages, count; struct uio uio; struct iovec iov; vm_offset_t kva; @@ -436,7 +436,7 @@ smbfs_getpages(ap) struct smbnode *np; struct smb_cred *scred; vm_object_t object; - vm_page_t *pages, m; + vm_page_t *pages; vp = ap->a_vp; if ((object = vp->v_object) == NULL) { @@ -451,26 +451,25 @@ smbfs_getpages(ap) pages = ap->a_m; count = ap->a_count; npages = btoc(count); - reqpage = ap->a_reqpage; + if (ap->a_rbehind) + *ap->a_rbehind = 0; + if (ap->a_rahead) + *ap->a_rahead = 0; /* * If the requested page is partially valid, just return it and * allow the pager to zero-out the blanks. Partially valid pages * can only occur at the file EOF. + * + * XXXGL: is that true for SMB filesystem? */ - m = pages[reqpage]; - VM_OBJECT_WLOCK(object); - if (m->valid != 0) { - for (i = 0; i < npages; ++i) { - if (i != reqpage) { - vm_page_lock(pages[i]); - vm_page_free(pages[i]); - vm_page_unlock(pages[i]); - } + if (pages[npages - 1]->valid != 0) { + if (--npages == 0) { + VM_OBJECT_WUNLOCK(object); + return (VM_PAGER_OK); } - VM_OBJECT_WUNLOCK(object); - return 0; + count = npages << PAGE_SHIFT; } VM_OBJECT_WUNLOCK(object); @@ -500,22 +499,14 @@ smbfs_getpages(ap) relpbuf(bp, &smbfs_pbuf_freecnt); - VM_OBJECT_WLOCK(object); if (error && (uio.uio_resid == count)) { printf("smbfs_getpages: error %d\n",error); - for (i = 0; i < npages; i++) { - if (reqpage != i) { - vm_page_lock(pages[i]); - vm_page_free(pages[i]); - vm_page_unlock(pages[i]); - } - } - VM_OBJECT_WUNLOCK(object); return VM_PAGER_ERROR; } size = count - uio.uio_resid; + VM_OBJECT_WLOCK(object); for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; @@ -544,9 +535,6 @@ smbfs_getpages(ap) */ ; } - - if (i != reqpage) - vm_page_readahead_finish(m); } VM_OBJECT_WUNLOCK(object); return 0; diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index fa489b2f3842..fcc8782b9dd5 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -1370,7 +1370,8 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) VM_OBJECT_WLOCK(uobj); goto retry; } else if (m->valid != VM_PAGE_BITS_ALL) - rv = vm_pager_get_pages(uobj, &m, 1, 0); + rv = vm_pager_get_pages(uobj, &m, 1, + NULL, NULL); else /* A cached page was reactivated. */ rv = VM_PAGER_OK; diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index a0eb0695fbba..7ad9c9379960 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -950,8 +950,7 @@ int exec_map_first_page(imgp) struct image_params *imgp; { - int rv, i; - int initial_pagein; + int rv, i, after, initial_pagein; vm_page_t ma[VM_INITIAL_PAGEIN]; vm_object_t object; @@ -967,9 +966,18 @@ exec_map_first_page(imgp) #endif ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL); if (ma[0]->valid != VM_PAGE_BITS_ALL) { - initial_pagein = VM_INITIAL_PAGEIN; - if (initial_pagein > object->size) - initial_pagein = object->size; + if (!vm_pager_has_page(object, 0, NULL, &after)) { + vm_page_lock(ma[0]); + vm_page_free(ma[0]); + vm_page_unlock(ma[0]); + vm_page_xunbusy(ma[0]); + VM_OBJECT_WUNLOCK(object); + return (EIO); + } + initial_pagein = min(after, VM_INITIAL_PAGEIN); + KASSERT(initial_pagein <= object->size, + ("%s: initial_pagein %d object->size %ju", + __func__, initial_pagein, (uintmax_t )object->size)); for (i = 1; i < initial_pagein; i++) { if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { if (ma[i]->valid) @@ -984,14 +992,19 @@ exec_map_first_page(imgp) } } initial_pagein = i; - rv = vm_pager_get_pages(object, ma, initial_pagein, 0); + rv = vm_pager_get_pages(object, ma, initial_pagein, NULL, NULL); if (rv != VM_PAGER_OK) { - vm_page_lock(ma[0]); - vm_page_free(ma[0]); - vm_page_unlock(ma[0]); + for (i = 0; i < initial_pagein; i++) { + vm_page_lock(ma[i]); + vm_page_free(ma[i]); + vm_page_unlock(ma[i]); + vm_page_xunbusy(ma[i]); + } VM_OBJECT_WUNLOCK(object); return (EIO); } + for (i = 1; i < initial_pagein; i++) + vm_page_readahead_finish(ma[i]); } vm_page_xunbusy(ma[0]); vm_page_lock(ma[0]); diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index 21cbe49e5cb6..8df6e43db0c9 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -189,7 +189,7 @@ uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) m = vm_page_grab(obj, idx, VM_ALLOC_NORMAL); if (m->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(obj, idx, NULL, NULL)) { - rv = vm_pager_get_pages(obj, &m, 1, 0); + rv = vm_pager_get_pages(obj, &m, 1, NULL, NULL); if (rv != VM_PAGER_OK) { printf( "uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n", @@ -460,7 +460,7 @@ shm_dotruncate(struct shmfd *shmfd, off_t length) goto retry; } else if (m->valid != VM_PAGE_BITS_ALL) rv = vm_pager_get_pages(object, &m, 1, - 0); + NULL, NULL); else /* A cached page was reactivated. */ rv = VM_PAGER_OK; diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 54c13024c611..c33a2cf30a22 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -2033,7 +2033,7 @@ sendfile_readpage(vm_object_t obj, struct vnode *vp, int nd, VM_OBJECT_WLOCK(obj); } else { if (vm_pager_has_page(obj, pindex, NULL, NULL)) { - rv = vm_pager_get_pages(obj, &m, 1, 0); + rv = vm_pager_get_pages(obj, &m, 1, NULL, NULL); SFSTAT_INC(sf_iocnt); if (rv != VM_PAGER_OK) { vm_page_lock(m); diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index e71294a78ae8..fd83f87ec3c5 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -731,12 +731,13 @@ vop_stdgetpages(ap) struct vnode *a_vp; vm_page_t *a_m; int a_count; - int a_reqpage; + int *a_rbehind; + int *a_rahead; } */ *ap; { return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, - ap->a_count, ap->a_reqpage, NULL, NULL); + ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL); } static int @@ -744,8 +745,9 @@ vop_stdgetpages_async(struct vop_getpages_async_args *ap) { int error; - error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage); - ap->a_iodone(ap->a_arg, ap->a_m, ap->a_reqpage, error); + error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, + ap->a_rahead); + ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error); return (error); } diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index 63f8eb9237cf..5586a14ab903 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -473,7 +473,8 @@ vop_getpages { IN struct vnode *vp; IN vm_page_t *m; IN int count; - IN int reqpage; + IN int *rbehind; + IN int *rahead; }; @@ -483,7 +484,8 @@ vop_getpages_async { IN struct vnode *vp; IN vm_page_t *m; IN int count; - IN int reqpage; + IN int *rbehind; + IN int *rahead; IN vop_getpages_iodone_t *iodone; IN void *arg; }; diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 75b41f3539d5..c85b88ff54f4 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -122,14 +122,13 @@ struct buf { struct ucred *b_rcred; /* Read credentials reference. */ struct ucred *b_wcred; /* Write credentials reference. */ union { - TAILQ_ENTRY(buf) bu_freelist; /* (Q) */ + TAILQ_ENTRY(buf) b_freelist; /* (Q) */ struct { - void (*pg_iodone)(void *, vm_page_t *, int, int); - int pg_reqpage; - } bu_pager; - } b_union; -#define b_freelist b_union.bu_freelist -#define b_pager b_union.bu_pager + void (*b_pgiodone)(void *, vm_page_t *, int, int); + int b_pgbefore; + int b_pgafter; + }; + }; union cluster_info { TAILQ_HEAD(cluster_list_head, buf) cluster_head; TAILQ_ENTRY(buf) cluster_entry; diff --git a/sys/vm/default_pager.c b/sys/vm/default_pager.c index 98dee45d730a..f334cd72d808 100644 --- a/sys/vm/default_pager.c +++ b/sys/vm/default_pager.c @@ -56,7 +56,7 @@ __FBSDID("$FreeBSD$"); static vm_object_t default_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); static void default_pager_dealloc(vm_object_t); -static int default_pager_getpages(vm_object_t, vm_page_t *, int, int); +static int default_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static void default_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *); static boolean_t default_pager_haspage(vm_object_t, vm_pindex_t, int *, @@ -122,13 +122,11 @@ default_pager_dealloc(object) * see a vm_page with assigned swap here. */ static int -default_pager_getpages(object, m, count, reqpage) - vm_object_t object; - vm_page_t *m; - int count; - int reqpage; +default_pager_getpages(vm_object_t object, vm_page_t *m, int count, + int *rbehind, int *rahead) { - return VM_PAGER_FAIL; + + return (VM_PAGER_FAIL); } /* diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index a0446be37a55..5473081de096 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -59,7 +59,7 @@ static void dev_pager_init(void); static vm_object_t dev_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); static void dev_pager_dealloc(vm_object_t); -static int dev_pager_getpages(vm_object_t, vm_page_t *, int, int); +static int dev_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static void dev_pager_putpages(vm_object_t, vm_page_t *, int, int, int *); static boolean_t dev_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); static void dev_pager_free_page(vm_object_t object, vm_page_t m); @@ -257,28 +257,33 @@ dev_pager_dealloc(vm_object_t object) } static int -dev_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int reqpage) +dev_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind, + int *rahead) { int error; + /* Since our haspage reports zero after/before, the count is 1. */ + KASSERT(count == 1, ("%s: count %d", __func__, count)); VM_OBJECT_ASSERT_WLOCKED(object); error = object->un_pager.devp.ops->cdev_pg_fault(object, - IDX_TO_OFF(ma[reqpage]->pindex), PROT_READ, &ma[reqpage]); + IDX_TO_OFF(ma[0]->pindex), PROT_READ, &ma[0]); VM_OBJECT_ASSERT_WLOCKED(object); - vm_pager_free_nonreq(object, ma, reqpage, count, TRUE); - if (error == VM_PAGER_OK) { KASSERT((object->type == OBJT_DEVICE && - (ma[reqpage]->oflags & VPO_UNMANAGED) != 0) || + (ma[0]->oflags & VPO_UNMANAGED) != 0) || (object->type == OBJT_MGTDEVICE && - (ma[reqpage]->oflags & VPO_UNMANAGED) == 0), - ("Wrong page type %p %p", ma[reqpage], object)); + (ma[0]->oflags & VPO_UNMANAGED) == 0), + ("Wrong page type %p %p", ma[0], object)); if (object->type == OBJT_DEVICE) { TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist, - ma[reqpage], plinks.q); + ma[0], plinks.q); } + if (rbehind) + *rbehind = 0; + if (rahead) + *rahead = 0; } return (error); diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c index 885a4515bb19..02e819e6a5c7 100644 --- a/sys/vm/phys_pager.c +++ b/sys/vm/phys_pager.c @@ -139,7 +139,8 @@ phys_pager_dealloc(vm_object_t object) * Fill as many pages as vm_fault has allocated for us. */ static int -phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) +phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, + int *rahead) { int i; @@ -154,14 +155,11 @@ phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) ("phys_pager_getpages: partially valid page %p", m[i])); KASSERT(m[i]->dirty == 0, ("phys_pager_getpages: dirty page %p", m[i])); - /* The requested page must remain busy, the others not. */ - if (i == reqpage) { - vm_page_lock(m[i]); - vm_page_flash(m[i]); - vm_page_unlock(m[i]); - } else - vm_page_xunbusy(m[i]); } + if (rbehind) + *rbehind = 0; + if (rahead) + *rahead = 0; return (VM_PAGER_OK); } diff --git a/sys/vm/sg_pager.c b/sys/vm/sg_pager.c index 23ebd3a4a06d..26aa1d3cff1b 100644 --- a/sys/vm/sg_pager.c +++ b/sys/vm/sg_pager.c @@ -49,7 +49,7 @@ __FBSDID("$FreeBSD$"); static vm_object_t sg_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); static void sg_pager_dealloc(vm_object_t); -static int sg_pager_getpages(vm_object_t, vm_page_t *, int, int); +static int sg_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static void sg_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *); static boolean_t sg_pager_haspage(vm_object_t, vm_pindex_t, int *, @@ -135,7 +135,8 @@ sg_pager_dealloc(vm_object_t object) } static int -sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) +sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, + int *rahead) { struct sglist *sg; vm_page_t m_paddr, page; @@ -145,11 +146,13 @@ sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) size_t space; int i; + /* Since our haspage reports zero after/before, the count is 1. */ + KASSERT(count == 1, ("%s: count %d", __func__, count)); VM_OBJECT_ASSERT_WLOCKED(object); sg = object->handle; memattr = object->memattr; VM_OBJECT_WUNLOCK(object); - offset = m[reqpage]->pindex; + offset = m[0]->pindex; /* * Lookup the physical address of the requested page. An initial @@ -178,26 +181,23 @@ sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) } /* Return a fake page for the requested page. */ - KASSERT(!(m[reqpage]->flags & PG_FICTITIOUS), + KASSERT(!(m[0]->flags & PG_FICTITIOUS), ("backing page for SG is fake")); /* Construct a new fake page. */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(object); TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, plinks.q); - - /* Free the original pages and insert this fake page into the object. */ - for (i = 0; i < count; i++) { - if (i == reqpage && - vm_page_replace(page, object, offset) != m[i]) - panic("sg_pager_getpages: invalid place replacement"); - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); - } - m[reqpage] = page; + if (vm_page_replace(page, object, offset) != m[0]) + panic("sg_pager_getpages: invalid place replacement"); + m[0] = page; page->valid = VM_PAGE_BITS_ALL; + if (rbehind) + *rbehind = 0; + if (rahead) + *rahead = 0; + return (VM_PAGER_OK); } diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 0bd4883c6847..f243ecaf19f9 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -357,9 +357,10 @@ static vm_object_t swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t offset, struct ucred *); static void swap_pager_dealloc(vm_object_t object); -static int swap_pager_getpages(vm_object_t, vm_page_t *, int, int); -static int swap_pager_getpages_async(vm_object_t, vm_page_t *, int, int, - pgo_getpages_iodone_t, void *); +static int swap_pager_getpages(vm_object_t, vm_page_t *, int, int *, + int *); +static int swap_pager_getpages_async(vm_object_t, vm_page_t *, int, int *, + int *, pgo_getpages_iodone_t, void *); static void swap_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *); static boolean_t swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after); @@ -413,16 +414,6 @@ static void swp_pager_meta_free(vm_object_t, vm_pindex_t, daddr_t); static void swp_pager_meta_free_all(vm_object_t); static daddr_t swp_pager_meta_ctl(vm_object_t, vm_pindex_t, int); -static void -swp_pager_free_nrpage(vm_page_t m) -{ - - vm_page_lock(m); - if (m->wire_count == 0) - vm_page_free(m); - vm_page_unlock(m); -} - /* * SWP_SIZECHECK() - update swap_pager_full indication * @@ -1103,16 +1094,12 @@ swap_pager_unswapped(vm_page_t m) * left busy, but the others adjusted. */ static int -swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) +swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, + int *rahead) { struct buf *bp; - vm_page_t mreq; - int i; - int j; daddr_t blk; - mreq = m[reqpage]; - /* * Calculate range to retrieve. The pages have already been assigned * their swapblks. We require a *contiguous* range but we know it to @@ -1122,45 +1109,18 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) * * The swp_*() calls must be made with the object locked. */ - blk = swp_pager_meta_ctl(mreq->object, mreq->pindex, 0); + blk = swp_pager_meta_ctl(m[0]->object, m[0]->pindex, 0); - for (i = reqpage - 1; i >= 0; --i) { - daddr_t iblk; - - iblk = swp_pager_meta_ctl(m[i]->object, m[i]->pindex, 0); - if (blk != iblk + (reqpage - i)) - break; - } - ++i; - - for (j = reqpage + 1; j < count; ++j) { - daddr_t jblk; - - jblk = swp_pager_meta_ctl(m[j]->object, m[j]->pindex, 0); - if (blk != jblk - (j - reqpage)) - break; - } - - /* - * free pages outside our collection range. Note: we never free - * mreq, it must remain busy throughout. - */ - if (0 < i || j < count) { - int k; - - for (k = 0; k < i; ++k) - swp_pager_free_nrpage(m[k]); - for (k = j; k < count; ++k) - swp_pager_free_nrpage(m[k]); - } - - /* - * Return VM_PAGER_FAIL if we have nothing to do. Return mreq - * still busy, but the others unbusied. - */ if (blk == SWAPBLK_NONE) return (VM_PAGER_FAIL); +#ifdef INVARIANTS + for (int i = 0; i < count; i++) + KASSERT(blk + i == + swp_pager_meta_ctl(m[i]->object, m[i]->pindex, 0), + ("%s: range is not contiguous", __func__)); +#endif + /* * Getpbuf() can sleep. */ @@ -1175,21 +1135,16 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) bp->b_iodone = swp_pager_async_iodone; bp->b_rcred = crhold(thread0.td_ucred); bp->b_wcred = crhold(thread0.td_ucred); - bp->b_blkno = blk - (reqpage - i); - bp->b_bcount = PAGE_SIZE * (j - i); - bp->b_bufsize = PAGE_SIZE * (j - i); - bp->b_pager.pg_reqpage = reqpage - i; + bp->b_blkno = blk; + bp->b_bcount = PAGE_SIZE * count; + bp->b_bufsize = PAGE_SIZE * count; + bp->b_npages = count; VM_OBJECT_WLOCK(object); - { - int k; - - for (k = i; k < j; ++k) { - bp->b_pages[k - i] = m[k]; - m[k]->oflags |= VPO_SWAPINPROG; - } + for (int i = 0; i < count; i++) { + bp->b_pages[i] = m[i]; + m[i]->oflags |= VPO_SWAPINPROG; } - bp->b_npages = j - i; PCPU_INC(cnt.v_swapin); PCPU_ADD(cnt.v_swappgsin, bp->b_npages); @@ -1221,8 +1176,8 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) * is set in the meta-data. */ VM_OBJECT_WLOCK(object); - while ((mreq->oflags & VPO_SWAPINPROG) != 0) { - mreq->oflags |= VPO_SWAPSLEEP; + while ((m[0]->oflags & VPO_SWAPINPROG) != 0) { + m[0]->oflags |= VPO_SWAPSLEEP; PCPU_INC(cnt.v_intrans); if (VM_OBJECT_SLEEP(object, &object->paging_in_progress, PSWP, "swread", hz * 20)) { @@ -1233,15 +1188,18 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) } /* - * mreq is left busied after completion, but all the other pages - * are freed. If we had an unrecoverable read error the page will - * not be valid. + * If we had an unrecoverable read error pages will not be valid. */ - if (mreq->valid != VM_PAGE_BITS_ALL) { - return (VM_PAGER_ERROR); - } else { - return (VM_PAGER_OK); - } + for (int i = 0; i < count; i++) + if (m[i]->valid != VM_PAGE_BITS_ALL) + return (VM_PAGER_ERROR); + + if (rbehind) + *rbehind = 0; + if (rahead) + *rahead = 0; + + return (VM_PAGER_OK); /* * A final note: in a low swap situation, we cannot deallocate swap @@ -1259,11 +1217,11 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) */ static int swap_pager_getpages_async(vm_object_t object, vm_page_t *m, int count, - int reqpage, pgo_getpages_iodone_t iodone, void *arg) + int *rbehind, int *rahead, pgo_getpages_iodone_t iodone, void *arg) { int r, error; - r = swap_pager_getpages(object, m, count, reqpage); + r = swap_pager_getpages(object, m, count, rbehind, rahead); VM_OBJECT_WUNLOCK(object); switch (r) { case VM_PAGER_OK: @@ -1527,33 +1485,11 @@ swp_pager_async_iodone(struct buf *bp) */ if (bp->b_iocmd == BIO_READ) { /* - * When reading, reqpage needs to stay - * locked for the parent, but all other - * pages can be freed. We still want to - * wakeup the parent waiting on the page, - * though. ( also: pg_reqpage can be -1 and - * not match anything ). - * - * We have to wake specifically requested pages - * up too because we cleared VPO_SWAPINPROG and - * someone may be waiting for that. - * * NOTE: for reads, m->dirty will probably * be overridden by the original caller of * getpages so don't play cute tricks here. */ m->valid = 0; - if (i != bp->b_pager.pg_reqpage) - swp_pager_free_nrpage(m); - else { - vm_page_lock(m); - vm_page_flash(m); - vm_page_unlock(m); - } - /* - * If i == bp->b_pager.pg_reqpage, do not wake - * the page up. The caller needs to. - */ } else { /* * If a write error occurs, reactivate page @@ -1575,38 +1511,12 @@ swp_pager_async_iodone(struct buf *bp) * want to do that anyway, but it was an optimization * that existed in the old swapper for a time before * it got ripped out due to precisely this problem. - * - * If not the requested page then deactivate it. - * - * Note that the requested page, reqpage, is left - * busied, but we still have to wake it up. The - * other pages are released (unbusied) by - * vm_page_xunbusy(). */ KASSERT(!pmap_page_is_mapped(m), ("swp_pager_async_iodone: page %p is mapped", m)); - m->valid = VM_PAGE_BITS_ALL; KASSERT(m->dirty == 0, ("swp_pager_async_iodone: page %p is dirty", m)); - - /* - * We have to wake specifically requested pages - * up too because we cleared VPO_SWAPINPROG and - * could be waiting for it in getpages. However, - * be sure to not unbusy getpages specifically - * requested page - getpages expects it to be - * left busy. - */ - if (i != bp->b_pager.pg_reqpage) { - vm_page_lock(m); - vm_page_deactivate(m); - vm_page_unlock(m); - vm_page_xunbusy(m); - } else { - vm_page_lock(m); - vm_page_flash(m); - vm_page_unlock(m); - } + m->valid = VM_PAGE_BITS_ALL; } else { /* * For write success, clear the dirty @@ -1727,7 +1637,7 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex) return; } - if (swap_pager_getpages(object, &m, 1, 0) != VM_PAGER_OK) + if (swap_pager_getpages(object, &m, 1, NULL, NULL) != VM_PAGER_OK) panic("swap_pager_force_pagein: read from swap failed");/*XXX*/ vm_object_pip_wakeup(object); vm_page_dirty(m); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 6990d12e0aad..a7e3d3760e56 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -107,13 +107,8 @@ __FBSDID("$FreeBSD$"); #define PFBAK 4 #define PFFOR 4 -static int vm_fault_additional_pages(vm_page_t, int, int, vm_page_t *, int *); - -#define VM_FAULT_READ_BEHIND 8 #define VM_FAULT_READ_DEFAULT (1 + VM_FAULT_READ_AHEAD_INIT) #define VM_FAULT_READ_MAX (1 + VM_FAULT_READ_AHEAD_MAX) -#define VM_FAULT_NINCR (VM_FAULT_READ_MAX / VM_FAULT_READ_BEHIND) -#define VM_FAULT_SUM (VM_FAULT_NINCR * (VM_FAULT_NINCR + 1) / 2) #define VM_FAULT_DONTNEED_MIN 1048576 @@ -133,7 +128,7 @@ struct faultstate { static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, int ahead); static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, - int faultcount, int reqpage); + int backward, int forward); static inline void release_page(struct faultstate *fs) @@ -288,11 +283,10 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { vm_prot_t prot; - int alloc_req, era, faultcount, nera, reqpage, result; + int alloc_req, era, faultcount, nera, result; boolean_t growstack, is_first_object_locked, wired; int map_generation; vm_object_t next_object; - vm_page_t marray[VM_FAULT_READ_MAX]; int hardfault; struct faultstate fs; struct vnode *vp; @@ -303,7 +297,7 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, growstack = TRUE; PCPU_INC(cnt.v_vm_faults); fs.vp = NULL; - faultcount = reqpage = 0; + faultcount = 0; RetryFault:; @@ -389,7 +383,7 @@ RetryFault:; FALSE); VM_OBJECT_RUNLOCK(fs.first_object); if (!wired) - vm_fault_prefault(&fs, vaddr, 0, 0); + vm_fault_prefault(&fs, vaddr, PFBAK, PFFOR); vm_map_lookup_done(fs.map, fs.entry); curthread->td_ru.ru_minflt++; return (KERN_SUCCESS); @@ -652,36 +646,13 @@ RetryFault:; ("vm_fault: vnode-backed object mapped by system map")); /* - * now we find out if any other pages should be paged - * in at this time this routine checks to see if the - * pages surrounding this fault reside in the same - * object as the page for this fault. If they do, - * then they are faulted in also into the object. The - * array "marray" returned contains an array of - * vm_page_t structs where one of them is the - * vm_page_t passed to the routine. The reqpage - * return value is the index into the marray for the - * vm_page_t passed to the routine. - * - * fs.m plus the additional pages are exclusive busied. + * Page in the requested page and hint the pager, + * that it may bring up surrounding pages. */ - faultcount = vm_fault_additional_pages( - fs.m, behind, ahead, marray, &reqpage); - - rv = faultcount ? - vm_pager_get_pages(fs.object, marray, faultcount, - reqpage) : VM_PAGER_FAIL; - + rv = vm_pager_get_pages(fs.object, &fs.m, 1, + &behind, &ahead); if (rv == VM_PAGER_OK) { - /* - * Found the page. Leave it busy while we play - * with it. - * - * Pager could have changed the page. Pager - * is responsible for disposition of old page - * if moved. - */ - fs.m = marray[reqpage]; + faultcount = behind + 1 + ahead; hardfault++; break; /* break to PAGE HAS BEEN FOUND */ } @@ -965,16 +936,13 @@ RetryFault:; } /* * If the page was filled by a pager, update the map entry's - * last read offset. Since the pager does not return the - * actual set of pages that it read, this update is based on - * the requested set. Typically, the requested and actual - * sets are the same. + * last read offset. * * XXX The following assignment modifies the map * without holding a write lock on it. */ if (hardfault) - fs.entry->next_read = fs.pindex + faultcount - reqpage; + fs.entry->next_read = fs.pindex + ahead + 1; vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags, TRUE); vm_page_assert_xbusied(fs.m); @@ -997,7 +965,9 @@ RetryFault:; fault_type | (wired ? PMAP_ENTER_WIRED : 0), 0); if (faultcount != 1 && (fault_flags & VM_FAULT_WIRE) == 0 && wired == 0) - vm_fault_prefault(&fs, vaddr, faultcount, reqpage); + vm_fault_prefault(&fs, vaddr, + faultcount > 0 ? behind : PFBAK, + faultcount > 0 ? ahead : PFFOR); VM_OBJECT_WLOCK(fs.object); vm_page_lock(fs.m); @@ -1114,7 +1084,7 @@ vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, int ahead) */ static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, - int faultcount, int reqpage) + int backward, int forward) { pmap_t pmap; vm_map_entry_t entry; @@ -1122,19 +1092,12 @@ vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, vm_offset_t addr, starta; vm_pindex_t pindex; vm_page_t m; - int backward, forward, i; + int i; pmap = fs->map->pmap; if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) return; - if (faultcount > 0) { - backward = reqpage; - forward = faultcount - reqpage - 1; - } else { - backward = PFBAK; - forward = PFFOR; - } entry = fs->entry; starta = addra - backward * PAGE_SIZE; @@ -1465,133 +1428,6 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, } } - -/* - * This routine checks around the requested page for other pages that - * might be able to be faulted in. This routine brackets the viable - * pages for the pages to be paged in. - * - * Inputs: - * m, rbehind, rahead - * - * Outputs: - * marray (array of vm_page_t), reqpage (index of requested page) - * - * Return value: - * number of pages in marray - */ -static int -vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) - vm_page_t m; - int rbehind; - int rahead; - vm_page_t *marray; - int *reqpage; -{ - int i,j; - vm_object_t object; - vm_pindex_t pindex, startpindex, endpindex, tpindex; - vm_page_t rtm; - int cbehind, cahead; - - VM_OBJECT_ASSERT_WLOCKED(m->object); - - object = m->object; - pindex = m->pindex; - cbehind = cahead = 0; - - /* - * if the requested page is not available, then give up now - */ - if (!vm_pager_has_page(object, pindex, &cbehind, &cahead)) { - return 0; - } - - if ((cbehind == 0) && (cahead == 0)) { - *reqpage = 0; - marray[0] = m; - return 1; - } - - if (rahead > cahead) { - rahead = cahead; - } - - if (rbehind > cbehind) { - rbehind = cbehind; - } - - /* - * scan backward for the read behind pages -- in memory - */ - if (pindex > 0) { - if (rbehind > pindex) { - rbehind = pindex; - startpindex = 0; - } else { - startpindex = pindex - rbehind; - } - - if ((rtm = TAILQ_PREV(m, pglist, listq)) != NULL && - rtm->pindex >= startpindex) - startpindex = rtm->pindex + 1; - - /* tpindex is unsigned; beware of numeric underflow. */ - for (i = 0, tpindex = pindex - 1; tpindex >= startpindex && - tpindex < pindex; i++, tpindex--) { - - rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL | - VM_ALLOC_IFNOTCACHED); - if (rtm == NULL) { - /* - * Shift the allocated pages to the - * beginning of the array. - */ - for (j = 0; j < i; j++) { - marray[j] = marray[j + tpindex + 1 - - startpindex]; - } - break; - } - - marray[tpindex - startpindex] = rtm; - } - } else { - startpindex = 0; - i = 0; - } - - marray[i] = m; - /* page offset of the required page */ - *reqpage = i; - - tpindex = pindex + 1; - i++; - - /* - * scan forward for the read ahead pages - */ - endpindex = tpindex + rahead; - if ((rtm = TAILQ_NEXT(m, listq)) != NULL && rtm->pindex < endpindex) - endpindex = rtm->pindex; - if (endpindex > object->size) - endpindex = object->size; - - for (; tpindex < endpindex; i++, tpindex++) { - - rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL | - VM_ALLOC_IFNOTCACHED); - if (rtm == NULL) { - break; - } - - marray[i] = rtm; - } - - /* return number of pages */ - return i; -} - /* * Block entry into the machine-independent layer's page fault handler by * the calling thread. Subsequent calls to vm_fault() by that thread will diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index f00dce1ac3ae..e1538db130fd 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -238,7 +238,7 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset) pindex = OFF_TO_IDX(offset); m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL); if (m->valid != VM_PAGE_BITS_ALL) { - rv = vm_pager_get_pages(object, &m, 1, 0); + rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(m); vm_page_free(m); @@ -567,37 +567,37 @@ vm_thread_swapin(struct thread *td) { vm_object_t ksobj; vm_page_t ma[KSTACK_MAX_PAGES]; - int i, j, pages, rv; + int pages; pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; VM_OBJECT_WLOCK(ksobj); - for (i = 0; i < pages; i++) + for (int i = 0; i < pages; i++) ma[i] = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_WIRED); - for (i = 0; i < pages; i++) { - if (ma[i]->valid != VM_PAGE_BITS_ALL) { - vm_page_assert_xbusied(ma[i]); - vm_object_pip_add(ksobj, 1); - for (j = i + 1; j < pages; j++) { - if (ma[j]->valid != VM_PAGE_BITS_ALL) - vm_page_assert_xbusied(ma[j]); - if (ma[j]->valid == VM_PAGE_BITS_ALL) - break; - } - rv = vm_pager_get_pages(ksobj, ma + i, j - i, 0); - if (rv != VM_PAGER_OK) - panic("vm_thread_swapin: cannot get kstack for proc: %d", - td->td_proc->p_pid); - /* - * All pages in the array are in place, due to the - * pager is always the swap pager, which doesn't - * free or remove wired non-req pages from object. - */ - vm_object_pip_wakeup(ksobj); - vm_page_xunbusy(ma[i]); - } else if (vm_page_xbusied(ma[i])) + for (int i = 0; i < pages;) { + int j, a, count, rv; + + vm_page_assert_xbusied(ma[i]); + if (ma[i]->valid == VM_PAGE_BITS_ALL) { vm_page_xunbusy(ma[i]); + i++; + continue; + } + vm_object_pip_add(ksobj, 1); + for (j = i + 1; j < pages; j++) + if (ma[j]->valid == VM_PAGE_BITS_ALL) + break; + rv = vm_pager_has_page(ksobj, ma[i]->pindex, NULL, &a); + KASSERT(rv == 1, ("%s: missing page %p", __func__, ma[i])); + count = min(a + 1, j - i); + rv = vm_pager_get_pages(ksobj, ma + i, count, NULL, NULL); + KASSERT(rv == VM_PAGER_OK, ("%s: cannot get kstack for proc %d", + __func__, td->td_proc->p_pid)); + vm_object_pip_wakeup(ksobj); + for (j = i; j < i + count; j++) + vm_page_xunbusy(ma[j]); + i += count; } VM_OBJECT_WUNLOCK(ksobj); pmap_qenter(td->td_kstack, ma, pages); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 9c5c83afb064..bc0e4c05f126 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -2014,7 +2014,7 @@ vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end) for (pindex = start; pindex < end; pindex++) { m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL); if (m->valid != VM_PAGE_BITS_ALL) { - rv = vm_pager_get_pages(object, &m, 1, 0); + rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(m); vm_page_free(m); diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 894a8d5616ba..ca2d73a75a42 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -243,6 +243,8 @@ extern struct vm_object kmem_object_store; rw_try_upgrade(&(object)->lock) #define VM_OBJECT_WLOCK(object) \ rw_wlock(&(object)->lock) +#define VM_OBJECT_WOWNED(object) \ + rw_wowned(&(object)->lock) #define VM_OBJECT_WUNLOCK(object) \ rw_wunlock(&(object)->lock) diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index e5edf77301c4..2e6b56a6d680 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -979,38 +979,28 @@ vm_page_free_zero(vm_page_t m) /* * Unbusy and handle the page queueing for a page from the VOP_GETPAGES() - * array which is not the request page. + * array which was optionally read ahead or behind. */ void vm_page_readahead_finish(vm_page_t m) { - if (m->valid != 0) { - /* - * Since the page is not the requested page, whether - * it should be activated or deactivated is not - * obvious. Empirical results have shown that - * deactivating the page is usually the best choice, - * unless the page is wanted by another thread. - */ - vm_page_lock(m); - if ((m->busy_lock & VPB_BIT_WAITERS) != 0) - vm_page_activate(m); - else - vm_page_deactivate(m); - vm_page_unlock(m); - vm_page_xunbusy(m); - } else { - /* - * Free the completely invalid page. Such page state - * occurs due to the short read operation which did - * not covered our page at all, or in case when a read - * error happens. - */ - vm_page_lock(m); - vm_page_free(m); - vm_page_unlock(m); - } + /* We shouldn't put invalid pages on queues. */ + KASSERT(m->valid != 0, ("%s: %p is invalid", __func__, m)); + + /* + * Since the page is not the actually needed one, whether it should + * be activated or deactivated is not obvious. Empirical results + * have shown that deactivating the page is usually the best choice, + * unless the page is wanted by another thread. + */ + vm_page_lock(m); + if ((m->busy_lock & VPB_BIT_WAITERS) != 0) + vm_page_activate(m); + else + vm_page_deactivate(m); + vm_page_unlock(m); + vm_page_xunbusy(m); } /* diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index 40d7d4e43ac7..7a1e6ae772e3 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -88,7 +88,7 @@ int cluster_pbuf_freecnt = -1; /* unlimited to begin with */ struct buf *swbuf; -static int dead_pager_getpages(vm_object_t, vm_page_t *, int, int); +static int dead_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static vm_object_t dead_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); static void dead_pager_putpages(vm_object_t, vm_page_t *, int, int, int *); @@ -96,13 +96,11 @@ static boolean_t dead_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); static void dead_pager_dealloc(vm_object_t); static int -dead_pager_getpages(obj, ma, count, req) - vm_object_t obj; - vm_page_t *ma; - int count; - int req; +dead_pager_getpages(vm_object_t obj, vm_page_t *ma, int count, int *rbehind, + int *rahead) { - return VM_PAGER_FAIL; + + return (VM_PAGER_FAIL); } static vm_object_t @@ -282,45 +280,47 @@ vm_pager_assert_in(vm_object_t object, vm_page_t *m, int count) * The requested page must be fully valid on successful return. */ int -vm_pager_get_pages(vm_object_t object, vm_page_t *m, int count, int reqpage) +vm_pager_get_pages(vm_object_t object, vm_page_t *m, int count, int *rbehind, + int *rahead) { +#ifdef INVARIANTS + vm_pindex_t pindex = m[0]->pindex; +#endif int r; vm_pager_assert_in(object, m, count); - r = (*pagertab[object->type]->pgo_getpages)(object, m, count, reqpage); + r = (*pagertab[object->type]->pgo_getpages)(object, m, count, rbehind, + rahead); if (r != VM_PAGER_OK) return (r); - /* - * If pager has replaced the page, assert that it had - * updated the array. Also assert that page is still - * busied. - */ - KASSERT(m[reqpage] == vm_page_lookup(object, m[reqpage]->pindex), - ("%s: mismatch page %p pindex %ju", __func__, - m[reqpage], (uintmax_t )m[reqpage]->pindex)); - vm_page_assert_xbusied(m[reqpage]); - - /* - * Pager didn't fill up entire page. Zero out - * partially filled data. - */ - if (m[reqpage]->valid != VM_PAGE_BITS_ALL) - vm_page_zero_invalid(m[reqpage], TRUE); - + for (int i = 0; i < count; i++) { + /* + * If pager has replaced a page, assert that it had + * updated the array. + */ + KASSERT(m[i] == vm_page_lookup(object, pindex++), + ("%s: mismatch page %p pindex %ju", __func__, + m[i], (uintmax_t )pindex - 1)); + /* + * Zero out partially filled data. + */ + if (m[i]->valid != VM_PAGE_BITS_ALL) + vm_page_zero_invalid(m[i], TRUE); + } return (VM_PAGER_OK); } int vm_pager_get_pages_async(vm_object_t object, vm_page_t *m, int count, - int reqpage, pgo_getpages_iodone_t iodone, void *arg) + int *rbehind, int *rahead, pgo_getpages_iodone_t iodone, void *arg) { vm_pager_assert_in(object, m, count); return ((*pagertab[object->type]->pgo_getpages_async)(object, m, - count, reqpage, iodone, arg)); + count, rbehind, rahead, iodone, arg)); } /* @@ -354,39 +354,6 @@ vm_pager_object_lookup(struct pagerlst *pg_list, void *handle) return (object); } -/* - * Free the non-requested pages from the given array. To remove all pages, - * caller should provide out of range reqpage number. - */ -void -vm_pager_free_nonreq(vm_object_t object, vm_page_t ma[], int reqpage, - int npages, boolean_t object_locked) -{ - enum { UNLOCKED, CALLER_LOCKED, INTERNALLY_LOCKED } locked; - int i; - - if (object_locked) { - VM_OBJECT_ASSERT_WLOCKED(object); - locked = CALLER_LOCKED; - } else { - VM_OBJECT_ASSERT_UNLOCKED(object); - locked = UNLOCKED; - } - for (i = 0; i < npages; ++i) { - if (i != reqpage) { - if (locked == UNLOCKED) { - VM_OBJECT_WLOCK(object); - locked = INTERNALLY_LOCKED; - } - vm_page_lock(ma[i]); - vm_page_free(ma[i]); - vm_page_unlock(ma[i]); - } - } - if (locked == INTERNALLY_LOCKED) - VM_OBJECT_WUNLOCK(object); -} - /* * initialize a physical buffer */ diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h index 6884729f7667..4b7d100a94aa 100644 --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -50,9 +50,9 @@ typedef void pgo_init_t(void); typedef vm_object_t pgo_alloc_t(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); typedef void pgo_dealloc_t(vm_object_t); -typedef int pgo_getpages_t(vm_object_t, vm_page_t *, int, int); +typedef int pgo_getpages_t(vm_object_t, vm_page_t *, int, int *, int *); typedef void pgo_getpages_iodone_t(void *, vm_page_t *, int, int); -typedef int pgo_getpages_async_t(vm_object_t, vm_page_t *, int, int, +typedef int pgo_getpages_async_t(vm_object_t, vm_page_t *, int, int *, int *, pgo_getpages_iodone_t, void *); typedef void pgo_putpages_t(vm_object_t, vm_page_t *, int, int, int *); typedef boolean_t pgo_haspage_t(vm_object_t, vm_pindex_t, int *, int *); @@ -106,14 +106,12 @@ vm_object_t vm_pager_allocate(objtype_t, void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); void vm_pager_bufferinit(void); void vm_pager_deallocate(vm_object_t); -int vm_pager_get_pages(vm_object_t, vm_page_t *, int, int); -int vm_pager_get_pages_async(vm_object_t, vm_page_t *, int, int, +int vm_pager_get_pages(vm_object_t, vm_page_t *, int, int *, int *); +int vm_pager_get_pages_async(vm_object_t, vm_page_t *, int, int *, int *, pgo_getpages_iodone_t, void *); static __inline boolean_t vm_pager_has_page(vm_object_t, vm_pindex_t, int *, int *); void vm_pager_init(void); vm_object_t vm_pager_object_lookup(struct pagerlst *, void *); -void vm_pager_free_nonreq(vm_object_t object, vm_page_t ma[], int reqpage, - int npages, boolean_t object_locked); static __inline void vm_pager_put_pages( diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 96510ac3083f..ff30f4d208ea 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -84,11 +84,9 @@ static int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address, static int vnode_pager_input_smlfs(vm_object_t object, vm_page_t m); static int vnode_pager_input_old(vm_object_t object, vm_page_t m); static void vnode_pager_dealloc(vm_object_t); -static int vnode_pager_local_getpages0(struct vnode *, vm_page_t *, int, int, - vop_getpages_iodone_t, void *); -static int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int); -static int vnode_pager_getpages_async(vm_object_t, vm_page_t *, int, int, - vop_getpages_iodone_t, void *); +static int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); +static int vnode_pager_getpages_async(vm_object_t, vm_page_t *, int, int *, + int *, vop_getpages_iodone_t, void *); static void vnode_pager_putpages(vm_object_t, vm_page_t *, int, int, int *); static boolean_t vnode_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); static vm_object_t vnode_pager_alloc(void *, vm_ooffset_t, vm_prot_t, @@ -673,15 +671,15 @@ vnode_pager_input_old(vm_object_t object, vm_page_t m) * backing vp's VOP_GETPAGES. */ static int -vnode_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) +vnode_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, + int *rahead) { - int rtval; struct vnode *vp; - int bytes = count * PAGE_SIZE; + int rtval; vp = object->handle; VM_OBJECT_WUNLOCK(object); - rtval = VOP_GETPAGES(vp, m, bytes, reqpage); + rtval = VOP_GETPAGES(vp, m, count, rbehind, rahead); KASSERT(rtval != EOPNOTSUPP, ("vnode_pager: FS getpages not implemented\n")); VM_OBJECT_WLOCK(object); @@ -690,15 +688,14 @@ vnode_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) static int vnode_pager_getpages_async(vm_object_t object, vm_page_t *m, int count, - int reqpage, vop_getpages_iodone_t iodone, void *arg) + int *rbehind, int *rahead, vop_getpages_iodone_t iodone, void *arg) { struct vnode *vp; int rtval; vp = object->handle; VM_OBJECT_WUNLOCK(object); - rtval = VOP_GETPAGES_ASYNC(vp, m, count * PAGE_SIZE, reqpage, - iodone, arg); + rtval = VOP_GETPAGES_ASYNC(vp, m, count, rbehind, rahead, iodone, arg); KASSERT(rtval != EOPNOTSUPP, ("vnode_pager: FS getpages_async not implemented\n")); VM_OBJECT_WLOCK(object); @@ -714,48 +711,16 @@ int vnode_pager_local_getpages(struct vop_getpages_args *ap) { - return (vnode_pager_local_getpages0(ap->a_vp, ap->a_m, ap->a_count, - ap->a_reqpage, NULL, NULL)); + return (vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_rbehind, ap->a_rahead, NULL, NULL)); } int vnode_pager_local_getpages_async(struct vop_getpages_async_args *ap) { - return (vnode_pager_local_getpages0(ap->a_vp, ap->a_m, ap->a_count, - ap->a_reqpage, ap->a_iodone, ap->a_arg)); -} - -static int -vnode_pager_local_getpages0(struct vnode *vp, vm_page_t *m, int bytecount, - int reqpage, vop_getpages_iodone_t iodone, void *arg) -{ - vm_page_t mreq; - - mreq = m[reqpage]; - - /* - * Since the caller has busied the requested page, that page's valid - * field will not be changed by other threads. - */ - vm_page_assert_xbusied(mreq); - - /* - * The requested page has valid blocks. Invalid part can only - * exist at the end of file, and the page is made fully valid - * by zeroing in vm_pager_get_pages(). Free non-requested - * pages, since no i/o is done to read its content. - */ - if (mreq->valid != 0) { - vm_pager_free_nonreq(mreq->object, m, reqpage, - round_page(bytecount) / PAGE_SIZE, FALSE); - if (iodone != NULL) - iodone(arg, m, reqpage, 0); - return (VM_PAGER_OK); - } - - return (vnode_pager_generic_getpages(vp, m, bytecount, reqpage, - iodone, arg)); + return (vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_rbehind, ap->a_rahead, ap->a_iodone, ap->a_arg)); } /* @@ -763,25 +728,43 @@ vnode_pager_local_getpages0(struct vnode *vp, vm_page_t *m, int bytecount, * own vnodes if they fail to implement VOP_GETPAGES. */ int -vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, - int reqpage, vop_getpages_iodone_t iodone, void *arg) +vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count, + int *a_rbehind, int *a_rahead, vop_getpages_iodone_t iodone, void *arg) { vm_object_t object; struct bufobj *bo; struct buf *bp; - daddr_t firstaddr, reqblock; - off_t foff, pib; - int pbefore, pafter, i, size, bsize, first, last, *freecnt; - int count, error, before, after, secmask; + off_t foff; + int bsize, pagesperblock, *freecnt; + int error, before, after, rbehind, rahead, poff, i; + int bytecount, secmask; KASSERT(vp->v_type != VCHR && vp->v_type != VBLK, - ("vnode_pager_generic_getpages does not support devices")); + ("%s does not support devices", __func__)); + if (vp->v_iflag & VI_DOOMED) return (VM_PAGER_BAD); object = vp->v_object; - count = bytecount / PAGE_SIZE; + foff = IDX_TO_OFF(m[0]->pindex); bsize = vp->v_mount->mnt_stat.f_iosize; + pagesperblock = bsize / PAGE_SIZE; + + KASSERT(foff < object->un_pager.vnp.vnp_size, + ("%s: page %p offset beyond vp %p size", __func__, m[0], vp)); + KASSERT(count <= sizeof(bp->b_pages), + ("%s: requested %d pages", __func__, count)); + + /* + * The last page has valid blocks. Invalid part can only + * exist at the end of file, and the page is made fully valid + * by zeroing in vm_pager_get_pages(). + */ + if (m[count - 1]->valid != 0 && --count == 0) { + if (iodone != NULL) + iodone(arg, m, 1, 0); + return (VM_PAGER_OK); + } /* * Synchronous and asynchronous paging operations use different @@ -800,130 +783,182 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, * If the file system doesn't support VOP_BMAP, use old way of * getting pages via VOP_READ. */ - error = VOP_BMAP(vp, IDX_TO_OFF(m[reqpage]->pindex) / bsize, &bo, - &reqblock, &after, &before); + error = VOP_BMAP(vp, foff / bsize, &bo, &bp->b_blkno, &after, &before); if (error == EOPNOTSUPP) { relpbuf(bp, freecnt); VM_OBJECT_WLOCK(object); - for (i = 0; i < count; i++) - if (i != reqpage) { - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); - } - PCPU_INC(cnt.v_vnodein); - PCPU_INC(cnt.v_vnodepgsin); - error = vnode_pager_input_old(object, m[reqpage]); + for (i = 0; i < count; i++) { + PCPU_INC(cnt.v_vnodein); + PCPU_INC(cnt.v_vnodepgsin); + error = vnode_pager_input_old(object, m[i]); + if (error) + break; + } VM_OBJECT_WUNLOCK(object); return (error); } else if (error != 0) { relpbuf(bp, freecnt); - vm_pager_free_nonreq(object, m, reqpage, count, FALSE); return (VM_PAGER_ERROR); - - /* - * If the blocksize is smaller than a page size, then use - * special small filesystem code. - */ - } else if ((PAGE_SIZE / bsize) > 1) { - relpbuf(bp, freecnt); - vm_pager_free_nonreq(object, m, reqpage, count, FALSE); - PCPU_INC(cnt.v_vnodein); - PCPU_INC(cnt.v_vnodepgsin); - return (vnode_pager_input_smlfs(object, m[reqpage])); } /* - * Since the caller has busied the requested page, that page's valid - * field will not be changed by other threads. + * If the file system supports BMAP, but blocksize is smaller + * than a page size, then use special small filesystem code. */ - vm_page_assert_xbusied(m[reqpage]); + if (pagesperblock == 0) { + for (i = 0; i < count; i++) { + PCPU_INC(cnt.v_vnodein); + PCPU_INC(cnt.v_vnodepgsin); + error = vnode_pager_input_smlfs(object, m[i]); + if (error) + break; + } + return (error); + } /* - * If we have a completely valid page available to us, we can - * clean up and return. Otherwise we have to re-read the - * media. + * A sparse file can be encountered only for a single page request, + * which may not be preceeded by call to vm_pager_haspage(). */ - if (m[reqpage]->valid == VM_PAGE_BITS_ALL) { + if (bp->b_blkno == -1) { + KASSERT(count == 1, + ("%s: array[%d] request to a sparse file %p", __func__, + count, vp)); relpbuf(bp, freecnt); - vm_pager_free_nonreq(object, m, reqpage, count, FALSE); - return (VM_PAGER_OK); - } else if (reqblock == -1) { - relpbuf(bp, freecnt); - pmap_zero_page(m[reqpage]); - KASSERT(m[reqpage]->dirty == 0, - ("vnode_pager_generic_getpages: page %p is dirty", m)); + pmap_zero_page(m[0]); + KASSERT(m[0]->dirty == 0, ("%s: page %p is dirty", + __func__, m[0])); VM_OBJECT_WLOCK(object); - m[reqpage]->valid = VM_PAGE_BITS_ALL; - vm_pager_free_nonreq(object, m, reqpage, count, TRUE); + m[0]->valid = VM_PAGE_BITS_ALL; VM_OBJECT_WUNLOCK(object); return (VM_PAGER_OK); - } else if (m[reqpage]->valid != 0) { - VM_OBJECT_WLOCK(object); - m[reqpage]->valid = 0; - VM_OBJECT_WUNLOCK(object); } - pib = IDX_TO_OFF(m[reqpage]->pindex) % bsize; - pbefore = ((daddr_t)before * bsize + pib) / PAGE_SIZE; - pafter = ((daddr_t)(after + 1) * bsize - pib) / PAGE_SIZE - 1; - first = reqpage < pbefore ? 0 : reqpage - pbefore; - last = reqpage + pafter >= count ? count - 1 : reqpage + pafter; - if (first > 0 || last + 1 < count) { + bp->b_blkno += (foff % bsize) / DEV_BSIZE; + + /* Recalculate blocks available after/before to pages. */ + poff = (foff % bsize) / PAGE_SIZE; + before *= pagesperblock; + before += poff; + after *= pagesperblock; + after += pagesperblock - (poff + 1); + if (m[0]->pindex + after >= object->size) + after = object->size - 1 - m[0]->pindex; + KASSERT(count <= after + 1, ("%s: %d pages asked, can do only %d", + __func__, count, after + 1)); + after -= count - 1; + + /* Trim requested rbehind/rahead to possible values. */ + rbehind = a_rbehind ? *a_rbehind : 0; + rahead = a_rahead ? *a_rahead : 0; + rbehind = min(rbehind, before); + rbehind = min(rbehind, m[0]->pindex); + rahead = min(rahead, after); + rahead = min(rahead, object->size - m[count - 1]->pindex); + KASSERT(rbehind + rahead + count <= sizeof(bp->b_pages), + ("%s: behind %d ahead %d count %d", __func__, + rbehind, rahead, count)); + + /* + * Fill in the bp->b_pages[] array with requested and optional + * read behind or read ahead pages. Read behind pages are looked + * up in a backward direction, down to a first cached page. Same + * for read ahead pages, but there is no need to shift the array + * in case of encountering a cached page. + */ + i = bp->b_npages = 0; + if (rbehind) { + vm_pindex_t startpindex, tpindex; + vm_page_t p; + VM_OBJECT_WLOCK(object); - for (i = 0; i < first; i++) { - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); + startpindex = m[0]->pindex - rbehind; + if ((p = TAILQ_PREV(m[0], pglist, listq)) != NULL && + p->pindex >= startpindex) + startpindex = p->pindex + 1; + + /* tpindex is unsigned; beware of numeric underflow. */ + for (tpindex = m[0]->pindex - 1; + tpindex >= startpindex && tpindex < m[0]->pindex; + tpindex--, i++) { + p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL | + VM_ALLOC_IFNOTCACHED); + if (p == NULL) { + /* Shift the array. */ + for (int j = 0; j < i; j++) + bp->b_pages[j] = bp->b_pages[j + + tpindex + 1 - startpindex]; + break; + } + bp->b_pages[tpindex - startpindex] = p; } - for (i = last + 1; i < count; i++) { - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); + + bp->b_pgbefore = i; + bp->b_npages += i; + bp->b_blkno -= IDX_TO_OFF(i) / DEV_BSIZE; + } else + bp->b_pgbefore = 0; + + /* Requested pages. */ + for (int j = 0; j < count; j++, i++) + bp->b_pages[i] = m[j]; + bp->b_npages += count; + + if (rahead) { + vm_pindex_t endpindex, tpindex; + vm_page_t p; + + if (!VM_OBJECT_WOWNED(object)) + VM_OBJECT_WLOCK(object); + endpindex = m[count - 1]->pindex + rahead + 1; + if ((p = TAILQ_NEXT(m[count - 1], listq)) != NULL && + p->pindex < endpindex) + endpindex = p->pindex; + if (endpindex > object->size) + endpindex = object->size; + + for (tpindex = m[count - 1]->pindex + 1; + tpindex < endpindex; i++, tpindex++) { + p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL | + VM_ALLOC_IFNOTCACHED); + if (p == NULL) + break; + bp->b_pages[i] = p; } + + bp->b_pgafter = i - bp->b_npages; + bp->b_npages = i; + } else + bp->b_pgafter = 0; + + if (VM_OBJECT_WOWNED(object)) VM_OBJECT_WUNLOCK(object); - } + + /* Report back actual behind/ahead read. */ + if (a_rbehind) + *a_rbehind = bp->b_pgbefore; + if (a_rahead) + *a_rahead = bp->b_pgafter; + + KASSERT(bp->b_npages <= sizeof(bp->b_pages), + ("%s: buf %p overflowed", __func__, bp)); /* - * here on direct device I/O - */ - firstaddr = reqblock; - firstaddr += pib / DEV_BSIZE; - firstaddr -= IDX_TO_OFF(reqpage - first) / DEV_BSIZE; - - /* - * The first and last page have been calculated now, move - * input pages to be zero based, and adjust the count. - */ - m += first; - reqpage -= first; - count = last - first + 1; - - /* - * calculate the file virtual address for the transfer - */ - foff = IDX_TO_OFF(m[0]->pindex); - - /* - * calculate the size of the transfer - */ - size = count * PAGE_SIZE; - KASSERT(count > 0, ("zero count")); - if ((foff + size) > object->un_pager.vnp.vnp_size) - size = object->un_pager.vnp.vnp_size - foff; - KASSERT(size > 0, ("zero size")); - - /* - * round up physical size for real devices. + * Recalculate first offset and bytecount with regards to read behind. + * Truncate bytecount to vnode real size and round up physical size + * for real devices. */ + foff = IDX_TO_OFF(bp->b_pages[0]->pindex); + bytecount = bp->b_npages << PAGE_SHIFT; + if ((foff + bytecount) > object->un_pager.vnp.vnp_size) + bytecount = object->un_pager.vnp.vnp_size - foff; secmask = bo->bo_bsize - 1; KASSERT(secmask < PAGE_SIZE && secmask > 0, - ("vnode_pager_generic_getpages: sector size %d too large", - secmask + 1)); - size = (size + secmask) & ~secmask; + ("%s: sector size %d too large", __func__, secmask + 1)); + bytecount = (bytecount + secmask) & ~secmask; /* - * and map the pages to be read into the kva, if the filesystem + * And map the pages to be read into the kva, if the filesystem * requires mapped buffers. */ if ((vp->v_mount->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0 && @@ -932,41 +967,32 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, bp->b_offset = 0; } else { bp->b_data = bp->b_kvabase; - pmap_qenter((vm_offset_t)bp->b_data, m, count); + pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages); } - /* build a minimal buffer header */ + /* Build a minimal buffer header. */ bp->b_iocmd = BIO_READ; KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred")); KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred")); bp->b_rcred = crhold(curthread->td_ucred); bp->b_wcred = crhold(curthread->td_ucred); - bp->b_blkno = firstaddr; pbgetbo(bo, bp); bp->b_vp = vp; - bp->b_bcount = size; - bp->b_bufsize = size; - bp->b_runningbufspace = bp->b_bufsize; - for (i = 0; i < count; i++) - bp->b_pages[i] = m[i]; - bp->b_npages = count; - bp->b_pager.pg_reqpage = reqpage; - atomic_add_long(&runningbufspace, bp->b_runningbufspace); - - PCPU_INC(cnt.v_vnodein); - PCPU_ADD(cnt.v_vnodepgsin, count); - - /* do the input */ + bp->b_bcount = bp->b_bufsize = bp->b_runningbufspace = bytecount; bp->b_iooffset = dbtob(bp->b_blkno); + atomic_add_long(&runningbufspace, bp->b_runningbufspace); + PCPU_INC(cnt.v_vnodein); + PCPU_ADD(cnt.v_vnodepgsin, bp->b_npages); + if (iodone != NULL) { /* async */ - bp->b_pager.pg_iodone = iodone; + bp->b_pgiodone = iodone; bp->b_caller1 = arg; bp->b_iodone = vnode_pager_generic_getpages_done_async; bp->b_flags |= B_ASYNC; BUF_KERNPROC(bp); bstrategy(bp); - /* Good bye! */ + return (VM_PAGER_OK); } else { bp->b_iodone = bdone; bstrategy(bp); @@ -977,9 +1003,8 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, bp->b_vp = NULL; pbrelbo(bp); relpbuf(bp, &vnode_pbuf_freecnt); + return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK); } - - return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK); } static void @@ -988,8 +1013,9 @@ vnode_pager_generic_getpages_done_async(struct buf *bp) int error; error = vnode_pager_generic_getpages_done(bp); - bp->b_pager.pg_iodone(bp->b_caller1, bp->b_pages, - bp->b_pager.pg_reqpage, error); + /* Run the iodone upon the requested range. */ + bp->b_pgiodone(bp->b_caller1, bp->b_pages + bp->b_pgbefore, + bp->b_npages - bp->b_pgbefore - bp->b_pgafter, error); for (int i = 0; i < bp->b_npages; i++) bp->b_pages[i] = NULL; bp->b_vp = NULL; @@ -1052,8 +1078,8 @@ vnode_pager_generic_getpages_done(struct buf *bp) object->un_pager.vnp.vnp_size - tfoff)) == 0, ("%s: page %p is dirty", __func__, mt)); } - - if (i != bp->b_pager.pg_reqpage) + + if (i < bp->b_pgbefore || i >= bp->b_npages - bp->b_pgafter) vm_page_readahead_finish(mt); } VM_OBJECT_WUNLOCK(object); diff --git a/sys/vm/vnode_pager.h b/sys/vm/vnode_pager.h index 1ff16ebfa937..a94b09b6b2ea 100644 --- a/sys/vm/vnode_pager.h +++ b/sys/vm/vnode_pager.h @@ -41,7 +41,8 @@ #ifdef _KERNEL int vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, - int count, int reqpage, vop_getpages_iodone_t iodone, void *arg); + int count, int *rbehind, int *rahead, vop_getpages_iodone_t iodone, + void *arg); int vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *m, int count, boolean_t sync, int *rtvals);