From b0cd20172d854584c67cd47461a77e98b43cbcd8 Mon Sep 17 00:00:00 2001 From: Gleb Smirnoff Date: Wed, 16 Dec 2015 21:30:45 +0000 Subject: [PATCH] A change to KPI of vm_pager_get_pages() and underlying VOP_GETPAGES(). o With new KPI consumers can request contiguous ranges of pages, and unlike before, all pages will be kept busied on return, like it was done before with the 'reqpage' only. Now the reqpage goes away. With new interface it is easier to implement code protected from race conditions. Such arrayed requests for now should be preceeded by a call to vm_pager_haspage() to make sure that request is possible. This could be improved later, making vm_pager_haspage() obsolete. Strenghtening the promises on the business of the array of pages allows us to remove such hacks as swp_pager_free_nrpage() and vm_pager_free_nonreq(). o New KPI accepts two integer pointers that may optionally point at values for read ahead and read behind, that a pager may do, if it can. These pages are completely owned by pager, and not controlled by the caller. This shifts the UFS-specific readahead logic from vm_fault.c, which should be file system agnostic, into vnode_pager.c. It also removes one VOP_BMAP() request per hard fault. Discussed with: kib, alc, jeff, scottl Sponsored by: Nginx, Inc. Sponsored by: Netflix --- share/man/man9/VOP_GETPAGES.9 | 19 +- .../opensolaris/uts/common/fs/zfs/zfs_vnops.c | 97 ++--- sys/dev/drm2/i915/i915_gem.c | 2 +- sys/dev/drm2/ttm/ttm_tt.c | 3 +- sys/dev/md/md.c | 9 +- sys/fs/fuse/fuse_vnops.c | 46 +-- sys/fs/nfsclient/nfs_clbio.c | 22 +- sys/fs/smbfs/smbfs_io.c | 40 +- sys/fs/tmpfs/tmpfs_subr.c | 3 +- sys/kern/kern_exec.c | 31 +- sys/kern/uipc_shm.c | 4 +- sys/kern/uipc_syscalls.c | 2 +- sys/kern/vfs_default.c | 10 +- sys/kern/vnode_if.src | 6 +- sys/sys/buf.h | 13 +- sys/vm/default_pager.c | 12 +- sys/vm/device_pager.c | 23 +- sys/vm/phys_pager.c | 14 +- sys/vm/sg_pager.c | 30 +- sys/vm/swap_pager.c | 166 ++------ sys/vm/vm_fault.c | 196 +--------- sys/vm/vm_glue.c | 50 +-- sys/vm/vm_object.c | 2 +- sys/vm/vm_object.h | 2 + sys/vm/vm_page.c | 44 +-- sys/vm/vm_pager.c | 89 ++--- sys/vm/vm_pager.h | 10 +- sys/vm/vnode_pager.c | 370 ++++++++++-------- sys/vm/vnode_pager.h | 3 +- 29 files changed, 509 insertions(+), 809 deletions(-) diff --git a/share/man/man9/VOP_GETPAGES.9 b/share/man/man9/VOP_GETPAGES.9 index 2cc5b7acff10..4625d9b57115 100644 --- a/share/man/man9/VOP_GETPAGES.9 +++ b/share/man/man9/VOP_GETPAGES.9 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 12, 2014 +.Dd December 16, 2015 .Dt VOP_GETPAGES 9 .Os .Sh NAME @@ -41,7 +41,7 @@ .In sys/vnode.h .In vm/vm.h .Ft int -.Fn VOP_GETPAGES "struct vnode *vp" "vm_page_t *ma" "int count" "int reqpage" +.Fn VOP_GETPAGES "struct vnode *vp" "vm_page_t *ma" "int count" "int *rbehind" "int *rahead" .Ft int .Fn VOP_PUTPAGES "struct vnode *vp" "vm_page_t *ma" "int count" "int sync" "int *rtvals" .Sh DESCRIPTION @@ -63,7 +63,7 @@ locks are held. Both methods return in the same state on both success and error returns. .Pp The arguments are: -.Bl -tag -width reqpage +.Bl -tag -width rbehind .It Fa vp The file to access. .It Fa ma @@ -78,9 +78,16 @@ if the write should be synchronous. An array of VM system result codes indicating the status of each page written by .Fn VOP_PUTPAGES . -.It Fa reqpage -The index in the page array of the requested page; i.e., the one page which -the implementation of this method must handle. +.It Fa rbehind +Optional pointer to integer specifying number of pages to be read behind, if +possible. +If the filesystem supports that feature, number of actually read pages is +reported back, otherwise zero is returned. +.It Fa rahead +Optional pointer to integer specifying number of pages to be read ahead, if +possible. +If the filesystem supports that feature, number of actually read pages is +reported back, otherwise zero is returned. .El .Pp The status of the diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index d1ed9dac23f3..b5af5415ef26 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -5762,12 +5762,13 @@ ioflags(int ioflags) } static int -zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) +zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int *rbehind, + int *rahead) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zp->z_zfsvfs->z_os; - vm_page_t mfirst, mlast, mreq; + vm_page_t mlast; vm_object_t object; caddr_t va; struct sf_buf *sf; @@ -5776,82 +5777,46 @@ zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) vm_pindex_t reqstart, reqend; int pcount, lsize, reqsize, size; + if (rbehind) + *rbehind = 0; + if (rahead) + *rahead = 0; + ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); pcount = OFF_TO_IDX(round_page(count)); - mreq = m[reqpage]; - object = mreq->object; - error = 0; - - if (pcount > 1 && zp->z_blksz > PAGESIZE) { - startoff = rounddown(IDX_TO_OFF(mreq->pindex), zp->z_blksz); - reqstart = OFF_TO_IDX(round_page(startoff)); - if (reqstart < m[0]->pindex) - reqstart = 0; - else - reqstart = reqstart - m[0]->pindex; - endoff = roundup(IDX_TO_OFF(mreq->pindex) + PAGE_SIZE, - zp->z_blksz); - reqend = OFF_TO_IDX(trunc_page(endoff)) - 1; - if (reqend > m[pcount - 1]->pindex) - reqend = m[pcount - 1]->pindex; - reqsize = reqend - m[reqstart]->pindex + 1; - KASSERT(reqstart <= reqpage && reqpage < reqstart + reqsize, - ("reqpage beyond [reqstart, reqstart + reqsize[ bounds")); - } else { - reqstart = reqpage; - reqsize = 1; - } - mfirst = m[reqstart]; - mlast = m[reqstart + reqsize - 1]; zfs_vmobject_wlock(object); - - for (i = 0; i < reqstart; i++) { - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); - } - for (i = reqstart + reqsize; i < pcount; i++) { - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); - } - - if (mreq->valid && reqsize == 1) { - if (mreq->valid != VM_PAGE_BITS_ALL) - vm_page_zero_invalid(mreq, TRUE); + if (m[pcount - 1]->valid != 0 && --pcount == 0) { zfs_vmobject_wunlock(object); ZFS_EXIT(zfsvfs); return (zfs_vm_pagerret_ok); } - PCPU_INC(cnt.v_vnodein); - PCPU_ADD(cnt.v_vnodepgsin, reqsize); + object = m[0]->object; + mlast = m[pcount - 1]; - if (IDX_TO_OFF(mreq->pindex) >= object->un_pager.vnp.vnp_size) { - for (i = reqstart; i < reqstart + reqsize; i++) { - if (i != reqpage) { - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); - } - } + if (IDX_TO_OFF(mlast->pindex) >= + object->un_pager.vnp.vnp_size) { zfs_vmobject_wunlock(object); ZFS_EXIT(zfsvfs); return (zfs_vm_pagerret_bad); } + PCPU_INC(cnt.v_vnodein); + PCPU_ADD(cnt.v_vnodepgsin, reqsize); + lsize = PAGE_SIZE; if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size) - lsize = object->un_pager.vnp.vnp_size - IDX_TO_OFF(mlast->pindex); - + lsize = object->un_pager.vnp.vnp_size - + IDX_TO_OFF(mlast->pindex); zfs_vmobject_wunlock(object); - for (i = reqstart; i < reqstart + reqsize; i++) { + error = 0; + for (i = 0; i < pcount; i++) { size = PAGE_SIZE; - if (i == (reqstart + reqsize - 1)) + if (i == pcount - 1) size = lsize; va = zfs_map_page(m[i], &sf); error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex), @@ -5860,21 +5825,15 @@ zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int reqpage) bzero(va + size, PAGE_SIZE - size); zfs_unmap_page(sf); if (error != 0) - break; + goto out; } zfs_vmobject_wlock(object); - - for (i = reqstart; i < reqstart + reqsize; i++) { - if (!error) - m[i]->valid = VM_PAGE_BITS_ALL; - KASSERT(m[i]->dirty == 0, ("zfs_getpages: page %p is dirty", m[i])); - if (i != reqpage) - vm_page_readahead_finish(m[i]); - } - + for (i = 0; i < pcount; i++) + m[i]->valid = VM_PAGE_BITS_ALL; zfs_vmobject_wunlock(object); +out: ZFS_ACCESSTIME_STAMP(zfsvfs, zp); ZFS_EXIT(zfsvfs); return (error ? zfs_vm_pagerret_error : zfs_vm_pagerret_ok); @@ -5886,11 +5845,13 @@ zfs_freebsd_getpages(ap) struct vnode *a_vp; vm_page_t *a_m; int a_count; - int a_reqpage; + int *a_rbehind; + int *a_rahead; } */ *ap; { - return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage)); + return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, + ap->a_rahead)); } static int diff --git a/sys/dev/drm2/i915/i915_gem.c b/sys/dev/drm2/i915/i915_gem.c index 07b516c07ed4..95edcf9acd24 100644 --- a/sys/dev/drm2/i915/i915_gem.c +++ b/sys/dev/drm2/i915/i915_gem.c @@ -4338,7 +4338,7 @@ i915_gem_wire_page(vm_object_t object, vm_pindex_t pindex, bool *fresh) page = vm_page_grab(object, pindex, VM_ALLOC_NORMAL); if (page->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(object, pindex, NULL, NULL)) { - rv = vm_pager_get_pages(object, &page, 1, 0); + rv = vm_pager_get_pages(object, &page, 1, NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(page); vm_page_free(page); diff --git a/sys/dev/drm2/ttm/ttm_tt.c b/sys/dev/drm2/ttm/ttm_tt.c index 2dd6fb4d1a3e..1e2db3cd8755 100644 --- a/sys/dev/drm2/ttm/ttm_tt.c +++ b/sys/dev/drm2/ttm/ttm_tt.c @@ -291,7 +291,8 @@ int ttm_tt_swapin(struct ttm_tt *ttm) from_page = vm_page_grab(obj, i, VM_ALLOC_NORMAL); if (from_page->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(obj, i, NULL, NULL)) { - rv = vm_pager_get_pages(obj, &from_page, 1, 0); + rv = vm_pager_get_pages(obj, &from_page, 1, + NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(from_page); vm_page_free(from_page); diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index 6405ad5f02b5..2fa9f460edd9 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -1019,7 +1019,8 @@ mdstart_swap(struct md_s *sc, struct bio *bp) if (m->valid == VM_PAGE_BITS_ALL) rv = VM_PAGER_OK; else - rv = vm_pager_get_pages(sc->object, &m, 1, 0); + rv = vm_pager_get_pages(sc->object, &m, 1, + NULL, NULL); if (rv == VM_PAGER_ERROR) { vm_page_xunbusy(m); break; @@ -1046,7 +1047,8 @@ mdstart_swap(struct md_s *sc, struct bio *bp) } } else if (bp->bio_cmd == BIO_WRITE) { if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL) - rv = vm_pager_get_pages(sc->object, &m, 1, 0); + rv = vm_pager_get_pages(sc->object, &m, 1, + NULL, NULL); else rv = VM_PAGER_OK; if (rv == VM_PAGER_ERROR) { @@ -1065,7 +1067,8 @@ mdstart_swap(struct md_s *sc, struct bio *bp) m->valid = VM_PAGE_BITS_ALL; } else if (bp->bio_cmd == BIO_DELETE) { if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL) - rv = vm_pager_get_pages(sc->object, &m, 1, 0); + rv = vm_pager_get_pages(sc->object, &m, 1, + NULL, NULL); else rv = VM_PAGER_OK; if (rv == VM_PAGER_ERROR) { diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index 12b97781dd07..50318647a2f5 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -1753,6 +1753,10 @@ fuse_vnop_getpages(struct vop_getpages_args *ap) cred = curthread->td_ucred; /* XXX */ pages = ap->a_m; count = ap->a_count; + if (ap->a_rbehind) + *ap->a_rbehind = 0; + if (ap->a_rahead) + *ap->a_rahead = 0; if (!fsess_opt_mmap(vnode_mount(vp))) { FS_DEBUG("called on non-cacheable vnode??\n"); @@ -1761,26 +1765,21 @@ fuse_vnop_getpages(struct vop_getpages_args *ap) npages = btoc(count); /* - * If the requested page is partially valid, just return it and - * allow the pager to zero-out the blanks. Partially valid pages - * can only occur at the file EOF. + * If the last page is partially valid, just return it and allow + * the pager to zero-out the blanks. Partially valid pages can + * only occur at the file EOF. + * + * XXXGL: is that true for FUSE, which is a local filesystem, + * but still somewhat disconnected from the kernel? */ - VM_OBJECT_WLOCK(vp->v_object); - fuse_vm_page_lock_queues(); - if (pages[ap->a_reqpage]->valid != 0) { - for (i = 0; i < npages; ++i) { - if (i != ap->a_reqpage) { - fuse_vm_page_lock(pages[i]); - vm_page_free(pages[i]); - fuse_vm_page_unlock(pages[i]); - } + if (pages[npages - 1]->valid != 0) { + if (--npages == 0) { + VM_OBJECT_WUNLOCK(vp->v_object); + return (VM_PAGER_OK); } - fuse_vm_page_unlock_queues(); - VM_OBJECT_WUNLOCK(vp->v_object); - return 0; - } - fuse_vm_page_unlock_queues(); + count = npages << PAGE_SHIFT; + } VM_OBJECT_WUNLOCK(vp->v_object); /* @@ -1811,17 +1810,6 @@ fuse_vnop_getpages(struct vop_getpages_args *ap) if (error && (uio.uio_resid == count)) { FS_DEBUG("error %d\n", error); - VM_OBJECT_WLOCK(vp->v_object); - fuse_vm_page_lock_queues(); - for (i = 0; i < npages; ++i) { - if (i != ap->a_reqpage) { - fuse_vm_page_lock(pages[i]); - vm_page_free(pages[i]); - fuse_vm_page_unlock(pages[i]); - } - } - fuse_vm_page_unlock_queues(); - VM_OBJECT_WUNLOCK(vp->v_object); return VM_PAGER_ERROR; } /* @@ -1862,8 +1850,6 @@ fuse_vnop_getpages(struct vop_getpages_args *ap) */ ; } - if (i != ap->a_reqpage) - vm_page_readahead_finish(m); } fuse_vm_page_unlock_queues(); VM_OBJECT_WUNLOCK(vp->v_object); diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c index 53ba7efe418f..5647868011ac 100644 --- a/sys/fs/nfsclient/nfs_clbio.c +++ b/sys/fs/nfsclient/nfs_clbio.c @@ -101,6 +101,10 @@ ncl_getpages(struct vop_getpages_args *ap) nmp = VFSTONFS(vp->v_mount); pages = ap->a_m; count = ap->a_count; + if (ap->a_rbehind) + *ap->a_rbehind = 0; + if (ap->a_rahead) + *ap->a_rahead = 0; if ((object = vp->v_object) == NULL) { ncl_printf("nfs_getpages: called with non-merged cache vnode??\n"); @@ -132,12 +136,18 @@ ncl_getpages(struct vop_getpages_args *ap) * If the requested page is partially valid, just return it and * allow the pager to zero-out the blanks. Partially valid pages * can only occur at the file EOF. + * + * XXXGL: is that true for NFS, where short read can occur??? */ - if (pages[ap->a_reqpage]->valid != 0) { - vm_pager_free_nonreq(object, pages, ap->a_reqpage, npages, - FALSE); - return (VM_PAGER_OK); + VM_OBJECT_WLOCK(object); + if (pages[npages - 1]->valid != 0) { + if (--npages == 0) { + VM_OBJECT_WUNLOCK(object); + return (VM_PAGER_OK); + } + count = npages << PAGE_SHIFT; } + VM_OBJECT_WUNLOCK(object); /* * We use only the kva address for the buffer, but this is extremely @@ -167,8 +177,6 @@ ncl_getpages(struct vop_getpages_args *ap) if (error && (uio.uio_resid == count)) { ncl_printf("nfs_getpages: error %d\n", error); - vm_pager_free_nonreq(object, pages, ap->a_reqpage, npages, - FALSE); return (VM_PAGER_ERROR); } @@ -212,8 +220,6 @@ ncl_getpages(struct vop_getpages_args *ap) */ ; } - if (i != ap->a_reqpage) - vm_page_readahead_finish(m); } VM_OBJECT_WUNLOCK(object); return (0); diff --git a/sys/fs/smbfs/smbfs_io.c b/sys/fs/smbfs/smbfs_io.c index a567ce6bf19c..5fe6f11dbbf1 100644 --- a/sys/fs/smbfs/smbfs_io.c +++ b/sys/fs/smbfs/smbfs_io.c @@ -424,7 +424,7 @@ smbfs_getpages(ap) #ifdef SMBFS_RWGENERIC return vop_stdgetpages(ap); #else - int i, error, nextoff, size, toff, npages, count, reqpage; + int i, error, nextoff, size, toff, npages, count; struct uio uio; struct iovec iov; vm_offset_t kva; @@ -436,7 +436,7 @@ smbfs_getpages(ap) struct smbnode *np; struct smb_cred *scred; vm_object_t object; - vm_page_t *pages, m; + vm_page_t *pages; vp = ap->a_vp; if ((object = vp->v_object) == NULL) { @@ -451,26 +451,25 @@ smbfs_getpages(ap) pages = ap->a_m; count = ap->a_count; npages = btoc(count); - reqpage = ap->a_reqpage; + if (ap->a_rbehind) + *ap->a_rbehind = 0; + if (ap->a_rahead) + *ap->a_rahead = 0; /* * If the requested page is partially valid, just return it and * allow the pager to zero-out the blanks. Partially valid pages * can only occur at the file EOF. + * + * XXXGL: is that true for SMB filesystem? */ - m = pages[reqpage]; - VM_OBJECT_WLOCK(object); - if (m->valid != 0) { - for (i = 0; i < npages; ++i) { - if (i != reqpage) { - vm_page_lock(pages[i]); - vm_page_free(pages[i]); - vm_page_unlock(pages[i]); - } + if (pages[npages - 1]->valid != 0) { + if (--npages == 0) { + VM_OBJECT_WUNLOCK(object); + return (VM_PAGER_OK); } - VM_OBJECT_WUNLOCK(object); - return 0; + count = npages << PAGE_SHIFT; } VM_OBJECT_WUNLOCK(object); @@ -500,22 +499,14 @@ smbfs_getpages(ap) relpbuf(bp, &smbfs_pbuf_freecnt); - VM_OBJECT_WLOCK(object); if (error && (uio.uio_resid == count)) { printf("smbfs_getpages: error %d\n",error); - for (i = 0; i < npages; i++) { - if (reqpage != i) { - vm_page_lock(pages[i]); - vm_page_free(pages[i]); - vm_page_unlock(pages[i]); - } - } - VM_OBJECT_WUNLOCK(object); return VM_PAGER_ERROR; } size = count - uio.uio_resid; + VM_OBJECT_WLOCK(object); for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; @@ -544,9 +535,6 @@ smbfs_getpages(ap) */ ; } - - if (i != reqpage) - vm_page_readahead_finish(m); } VM_OBJECT_WUNLOCK(object); return 0; diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index fa489b2f3842..fcc8782b9dd5 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -1370,7 +1370,8 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) VM_OBJECT_WLOCK(uobj); goto retry; } else if (m->valid != VM_PAGE_BITS_ALL) - rv = vm_pager_get_pages(uobj, &m, 1, 0); + rv = vm_pager_get_pages(uobj, &m, 1, + NULL, NULL); else /* A cached page was reactivated. */ rv = VM_PAGER_OK; diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index a0eb0695fbba..7ad9c9379960 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -950,8 +950,7 @@ int exec_map_first_page(imgp) struct image_params *imgp; { - int rv, i; - int initial_pagein; + int rv, i, after, initial_pagein; vm_page_t ma[VM_INITIAL_PAGEIN]; vm_object_t object; @@ -967,9 +966,18 @@ exec_map_first_page(imgp) #endif ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL); if (ma[0]->valid != VM_PAGE_BITS_ALL) { - initial_pagein = VM_INITIAL_PAGEIN; - if (initial_pagein > object->size) - initial_pagein = object->size; + if (!vm_pager_has_page(object, 0, NULL, &after)) { + vm_page_lock(ma[0]); + vm_page_free(ma[0]); + vm_page_unlock(ma[0]); + vm_page_xunbusy(ma[0]); + VM_OBJECT_WUNLOCK(object); + return (EIO); + } + initial_pagein = min(after, VM_INITIAL_PAGEIN); + KASSERT(initial_pagein <= object->size, + ("%s: initial_pagein %d object->size %ju", + __func__, initial_pagein, (uintmax_t )object->size)); for (i = 1; i < initial_pagein; i++) { if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { if (ma[i]->valid) @@ -984,14 +992,19 @@ exec_map_first_page(imgp) } } initial_pagein = i; - rv = vm_pager_get_pages(object, ma, initial_pagein, 0); + rv = vm_pager_get_pages(object, ma, initial_pagein, NULL, NULL); if (rv != VM_PAGER_OK) { - vm_page_lock(ma[0]); - vm_page_free(ma[0]); - vm_page_unlock(ma[0]); + for (i = 0; i < initial_pagein; i++) { + vm_page_lock(ma[i]); + vm_page_free(ma[i]); + vm_page_unlock(ma[i]); + vm_page_xunbusy(ma[i]); + } VM_OBJECT_WUNLOCK(object); return (EIO); } + for (i = 1; i < initial_pagein; i++) + vm_page_readahead_finish(ma[i]); } vm_page_xunbusy(ma[0]); vm_page_lock(ma[0]); diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index 21cbe49e5cb6..8df6e43db0c9 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -189,7 +189,7 @@ uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) m = vm_page_grab(obj, idx, VM_ALLOC_NORMAL); if (m->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(obj, idx, NULL, NULL)) { - rv = vm_pager_get_pages(obj, &m, 1, 0); + rv = vm_pager_get_pages(obj, &m, 1, NULL, NULL); if (rv != VM_PAGER_OK) { printf( "uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n", @@ -460,7 +460,7 @@ shm_dotruncate(struct shmfd *shmfd, off_t length) goto retry; } else if (m->valid != VM_PAGE_BITS_ALL) rv = vm_pager_get_pages(object, &m, 1, - 0); + NULL, NULL); else /* A cached page was reactivated. */ rv = VM_PAGER_OK; diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 54c13024c611..c33a2cf30a22 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -2033,7 +2033,7 @@ sendfile_readpage(vm_object_t obj, struct vnode *vp, int nd, VM_OBJECT_WLOCK(obj); } else { if (vm_pager_has_page(obj, pindex, NULL, NULL)) { - rv = vm_pager_get_pages(obj, &m, 1, 0); + rv = vm_pager_get_pages(obj, &m, 1, NULL, NULL); SFSTAT_INC(sf_iocnt); if (rv != VM_PAGER_OK) { vm_page_lock(m); diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index e71294a78ae8..fd83f87ec3c5 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -731,12 +731,13 @@ vop_stdgetpages(ap) struct vnode *a_vp; vm_page_t *a_m; int a_count; - int a_reqpage; + int *a_rbehind; + int *a_rahead; } */ *ap; { return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, - ap->a_count, ap->a_reqpage, NULL, NULL); + ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL); } static int @@ -744,8 +745,9 @@ vop_stdgetpages_async(struct vop_getpages_async_args *ap) { int error; - error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage); - ap->a_iodone(ap->a_arg, ap->a_m, ap->a_reqpage, error); + error = VOP_GETPAGES(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, + ap->a_rahead); + ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error); return (error); } diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index 63f8eb9237cf..5586a14ab903 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -473,7 +473,8 @@ vop_getpages { IN struct vnode *vp; IN vm_page_t *m; IN int count; - IN int reqpage; + IN int *rbehind; + IN int *rahead; }; @@ -483,7 +484,8 @@ vop_getpages_async { IN struct vnode *vp; IN vm_page_t *m; IN int count; - IN int reqpage; + IN int *rbehind; + IN int *rahead; IN vop_getpages_iodone_t *iodone; IN void *arg; }; diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 75b41f3539d5..c85b88ff54f4 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -122,14 +122,13 @@ struct buf { struct ucred *b_rcred; /* Read credentials reference. */ struct ucred *b_wcred; /* Write credentials reference. */ union { - TAILQ_ENTRY(buf) bu_freelist; /* (Q) */ + TAILQ_ENTRY(buf) b_freelist; /* (Q) */ struct { - void (*pg_iodone)(void *, vm_page_t *, int, int); - int pg_reqpage; - } bu_pager; - } b_union; -#define b_freelist b_union.bu_freelist -#define b_pager b_union.bu_pager + void (*b_pgiodone)(void *, vm_page_t *, int, int); + int b_pgbefore; + int b_pgafter; + }; + }; union cluster_info { TAILQ_HEAD(cluster_list_head, buf) cluster_head; TAILQ_ENTRY(buf) cluster_entry; diff --git a/sys/vm/default_pager.c b/sys/vm/default_pager.c index 98dee45d730a..f334cd72d808 100644 --- a/sys/vm/default_pager.c +++ b/sys/vm/default_pager.c @@ -56,7 +56,7 @@ __FBSDID("$FreeBSD$"); static vm_object_t default_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); static void default_pager_dealloc(vm_object_t); -static int default_pager_getpages(vm_object_t, vm_page_t *, int, int); +static int default_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static void default_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *); static boolean_t default_pager_haspage(vm_object_t, vm_pindex_t, int *, @@ -122,13 +122,11 @@ default_pager_dealloc(object) * see a vm_page with assigned swap here. */ static int -default_pager_getpages(object, m, count, reqpage) - vm_object_t object; - vm_page_t *m; - int count; - int reqpage; +default_pager_getpages(vm_object_t object, vm_page_t *m, int count, + int *rbehind, int *rahead) { - return VM_PAGER_FAIL; + + return (VM_PAGER_FAIL); } /* diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index a0446be37a55..5473081de096 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -59,7 +59,7 @@ static void dev_pager_init(void); static vm_object_t dev_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); static void dev_pager_dealloc(vm_object_t); -static int dev_pager_getpages(vm_object_t, vm_page_t *, int, int); +static int dev_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static void dev_pager_putpages(vm_object_t, vm_page_t *, int, int, int *); static boolean_t dev_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); static void dev_pager_free_page(vm_object_t object, vm_page_t m); @@ -257,28 +257,33 @@ dev_pager_dealloc(vm_object_t object) } static int -dev_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int reqpage) +dev_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind, + int *rahead) { int error; + /* Since our haspage reports zero after/before, the count is 1. */ + KASSERT(count == 1, ("%s: count %d", __func__, count)); VM_OBJECT_ASSERT_WLOCKED(object); error = object->un_pager.devp.ops->cdev_pg_fault(object, - IDX_TO_OFF(ma[reqpage]->pindex), PROT_READ, &ma[reqpage]); + IDX_TO_OFF(ma[0]->pindex), PROT_READ, &ma[0]); VM_OBJECT_ASSERT_WLOCKED(object); - vm_pager_free_nonreq(object, ma, reqpage, count, TRUE); - if (error == VM_PAGER_OK) { KASSERT((object->type == OBJT_DEVICE && - (ma[reqpage]->oflags & VPO_UNMANAGED) != 0) || + (ma[0]->oflags & VPO_UNMANAGED) != 0) || (object->type == OBJT_MGTDEVICE && - (ma[reqpage]->oflags & VPO_UNMANAGED) == 0), - ("Wrong page type %p %p", ma[reqpage], object)); + (ma[0]->oflags & VPO_UNMANAGED) == 0), + ("Wrong page type %p %p", ma[0], object)); if (object->type == OBJT_DEVICE) { TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist, - ma[reqpage], plinks.q); + ma[0], plinks.q); } + if (rbehind) + *rbehind = 0; + if (rahead) + *rahead = 0; } return (error); diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c index 885a4515bb19..02e819e6a5c7 100644 --- a/sys/vm/phys_pager.c +++ b/sys/vm/phys_pager.c @@ -139,7 +139,8 @@ phys_pager_dealloc(vm_object_t object) * Fill as many pages as vm_fault has allocated for us. */ static int -phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) +phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, + int *rahead) { int i; @@ -154,14 +155,11 @@ phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) ("phys_pager_getpages: partially valid page %p", m[i])); KASSERT(m[i]->dirty == 0, ("phys_pager_getpages: dirty page %p", m[i])); - /* The requested page must remain busy, the others not. */ - if (i == reqpage) { - vm_page_lock(m[i]); - vm_page_flash(m[i]); - vm_page_unlock(m[i]); - } else - vm_page_xunbusy(m[i]); } + if (rbehind) + *rbehind = 0; + if (rahead) + *rahead = 0; return (VM_PAGER_OK); } diff --git a/sys/vm/sg_pager.c b/sys/vm/sg_pager.c index 23ebd3a4a06d..26aa1d3cff1b 100644 --- a/sys/vm/sg_pager.c +++ b/sys/vm/sg_pager.c @@ -49,7 +49,7 @@ __FBSDID("$FreeBSD$"); static vm_object_t sg_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); static void sg_pager_dealloc(vm_object_t); -static int sg_pager_getpages(vm_object_t, vm_page_t *, int, int); +static int sg_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static void sg_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *); static boolean_t sg_pager_haspage(vm_object_t, vm_pindex_t, int *, @@ -135,7 +135,8 @@ sg_pager_dealloc(vm_object_t object) } static int -sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) +sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, + int *rahead) { struct sglist *sg; vm_page_t m_paddr, page; @@ -145,11 +146,13 @@ sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) size_t space; int i; + /* Since our haspage reports zero after/before, the count is 1. */ + KASSERT(count == 1, ("%s: count %d", __func__, count)); VM_OBJECT_ASSERT_WLOCKED(object); sg = object->handle; memattr = object->memattr; VM_OBJECT_WUNLOCK(object); - offset = m[reqpage]->pindex; + offset = m[0]->pindex; /* * Lookup the physical address of the requested page. An initial @@ -178,26 +181,23 @@ sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) } /* Return a fake page for the requested page. */ - KASSERT(!(m[reqpage]->flags & PG_FICTITIOUS), + KASSERT(!(m[0]->flags & PG_FICTITIOUS), ("backing page for SG is fake")); /* Construct a new fake page. */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(object); TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, plinks.q); - - /* Free the original pages and insert this fake page into the object. */ - for (i = 0; i < count; i++) { - if (i == reqpage && - vm_page_replace(page, object, offset) != m[i]) - panic("sg_pager_getpages: invalid place replacement"); - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); - } - m[reqpage] = page; + if (vm_page_replace(page, object, offset) != m[0]) + panic("sg_pager_getpages: invalid place replacement"); + m[0] = page; page->valid = VM_PAGE_BITS_ALL; + if (rbehind) + *rbehind = 0; + if (rahead) + *rahead = 0; + return (VM_PAGER_OK); } diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 0bd4883c6847..f243ecaf19f9 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -357,9 +357,10 @@ static vm_object_t swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t offset, struct ucred *); static void swap_pager_dealloc(vm_object_t object); -static int swap_pager_getpages(vm_object_t, vm_page_t *, int, int); -static int swap_pager_getpages_async(vm_object_t, vm_page_t *, int, int, - pgo_getpages_iodone_t, void *); +static int swap_pager_getpages(vm_object_t, vm_page_t *, int, int *, + int *); +static int swap_pager_getpages_async(vm_object_t, vm_page_t *, int, int *, + int *, pgo_getpages_iodone_t, void *); static void swap_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *); static boolean_t swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after); @@ -413,16 +414,6 @@ static void swp_pager_meta_free(vm_object_t, vm_pindex_t, daddr_t); static void swp_pager_meta_free_all(vm_object_t); static daddr_t swp_pager_meta_ctl(vm_object_t, vm_pindex_t, int); -static void -swp_pager_free_nrpage(vm_page_t m) -{ - - vm_page_lock(m); - if (m->wire_count == 0) - vm_page_free(m); - vm_page_unlock(m); -} - /* * SWP_SIZECHECK() - update swap_pager_full indication * @@ -1103,16 +1094,12 @@ swap_pager_unswapped(vm_page_t m) * left busy, but the others adjusted. */ static int -swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) +swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, + int *rahead) { struct buf *bp; - vm_page_t mreq; - int i; - int j; daddr_t blk; - mreq = m[reqpage]; - /* * Calculate range to retrieve. The pages have already been assigned * their swapblks. We require a *contiguous* range but we know it to @@ -1122,45 +1109,18 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) * * The swp_*() calls must be made with the object locked. */ - blk = swp_pager_meta_ctl(mreq->object, mreq->pindex, 0); + blk = swp_pager_meta_ctl(m[0]->object, m[0]->pindex, 0); - for (i = reqpage - 1; i >= 0; --i) { - daddr_t iblk; - - iblk = swp_pager_meta_ctl(m[i]->object, m[i]->pindex, 0); - if (blk != iblk + (reqpage - i)) - break; - } - ++i; - - for (j = reqpage + 1; j < count; ++j) { - daddr_t jblk; - - jblk = swp_pager_meta_ctl(m[j]->object, m[j]->pindex, 0); - if (blk != jblk - (j - reqpage)) - break; - } - - /* - * free pages outside our collection range. Note: we never free - * mreq, it must remain busy throughout. - */ - if (0 < i || j < count) { - int k; - - for (k = 0; k < i; ++k) - swp_pager_free_nrpage(m[k]); - for (k = j; k < count; ++k) - swp_pager_free_nrpage(m[k]); - } - - /* - * Return VM_PAGER_FAIL if we have nothing to do. Return mreq - * still busy, but the others unbusied. - */ if (blk == SWAPBLK_NONE) return (VM_PAGER_FAIL); +#ifdef INVARIANTS + for (int i = 0; i < count; i++) + KASSERT(blk + i == + swp_pager_meta_ctl(m[i]->object, m[i]->pindex, 0), + ("%s: range is not contiguous", __func__)); +#endif + /* * Getpbuf() can sleep. */ @@ -1175,21 +1135,16 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) bp->b_iodone = swp_pager_async_iodone; bp->b_rcred = crhold(thread0.td_ucred); bp->b_wcred = crhold(thread0.td_ucred); - bp->b_blkno = blk - (reqpage - i); - bp->b_bcount = PAGE_SIZE * (j - i); - bp->b_bufsize = PAGE_SIZE * (j - i); - bp->b_pager.pg_reqpage = reqpage - i; + bp->b_blkno = blk; + bp->b_bcount = PAGE_SIZE * count; + bp->b_bufsize = PAGE_SIZE * count; + bp->b_npages = count; VM_OBJECT_WLOCK(object); - { - int k; - - for (k = i; k < j; ++k) { - bp->b_pages[k - i] = m[k]; - m[k]->oflags |= VPO_SWAPINPROG; - } + for (int i = 0; i < count; i++) { + bp->b_pages[i] = m[i]; + m[i]->oflags |= VPO_SWAPINPROG; } - bp->b_npages = j - i; PCPU_INC(cnt.v_swapin); PCPU_ADD(cnt.v_swappgsin, bp->b_npages); @@ -1221,8 +1176,8 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) * is set in the meta-data. */ VM_OBJECT_WLOCK(object); - while ((mreq->oflags & VPO_SWAPINPROG) != 0) { - mreq->oflags |= VPO_SWAPSLEEP; + while ((m[0]->oflags & VPO_SWAPINPROG) != 0) { + m[0]->oflags |= VPO_SWAPSLEEP; PCPU_INC(cnt.v_intrans); if (VM_OBJECT_SLEEP(object, &object->paging_in_progress, PSWP, "swread", hz * 20)) { @@ -1233,15 +1188,18 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) } /* - * mreq is left busied after completion, but all the other pages - * are freed. If we had an unrecoverable read error the page will - * not be valid. + * If we had an unrecoverable read error pages will not be valid. */ - if (mreq->valid != VM_PAGE_BITS_ALL) { - return (VM_PAGER_ERROR); - } else { - return (VM_PAGER_OK); - } + for (int i = 0; i < count; i++) + if (m[i]->valid != VM_PAGE_BITS_ALL) + return (VM_PAGER_ERROR); + + if (rbehind) + *rbehind = 0; + if (rahead) + *rahead = 0; + + return (VM_PAGER_OK); /* * A final note: in a low swap situation, we cannot deallocate swap @@ -1259,11 +1217,11 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) */ static int swap_pager_getpages_async(vm_object_t object, vm_page_t *m, int count, - int reqpage, pgo_getpages_iodone_t iodone, void *arg) + int *rbehind, int *rahead, pgo_getpages_iodone_t iodone, void *arg) { int r, error; - r = swap_pager_getpages(object, m, count, reqpage); + r = swap_pager_getpages(object, m, count, rbehind, rahead); VM_OBJECT_WUNLOCK(object); switch (r) { case VM_PAGER_OK: @@ -1527,33 +1485,11 @@ swp_pager_async_iodone(struct buf *bp) */ if (bp->b_iocmd == BIO_READ) { /* - * When reading, reqpage needs to stay - * locked for the parent, but all other - * pages can be freed. We still want to - * wakeup the parent waiting on the page, - * though. ( also: pg_reqpage can be -1 and - * not match anything ). - * - * We have to wake specifically requested pages - * up too because we cleared VPO_SWAPINPROG and - * someone may be waiting for that. - * * NOTE: for reads, m->dirty will probably * be overridden by the original caller of * getpages so don't play cute tricks here. */ m->valid = 0; - if (i != bp->b_pager.pg_reqpage) - swp_pager_free_nrpage(m); - else { - vm_page_lock(m); - vm_page_flash(m); - vm_page_unlock(m); - } - /* - * If i == bp->b_pager.pg_reqpage, do not wake - * the page up. The caller needs to. - */ } else { /* * If a write error occurs, reactivate page @@ -1575,38 +1511,12 @@ swp_pager_async_iodone(struct buf *bp) * want to do that anyway, but it was an optimization * that existed in the old swapper for a time before * it got ripped out due to precisely this problem. - * - * If not the requested page then deactivate it. - * - * Note that the requested page, reqpage, is left - * busied, but we still have to wake it up. The - * other pages are released (unbusied) by - * vm_page_xunbusy(). */ KASSERT(!pmap_page_is_mapped(m), ("swp_pager_async_iodone: page %p is mapped", m)); - m->valid = VM_PAGE_BITS_ALL; KASSERT(m->dirty == 0, ("swp_pager_async_iodone: page %p is dirty", m)); - - /* - * We have to wake specifically requested pages - * up too because we cleared VPO_SWAPINPROG and - * could be waiting for it in getpages. However, - * be sure to not unbusy getpages specifically - * requested page - getpages expects it to be - * left busy. - */ - if (i != bp->b_pager.pg_reqpage) { - vm_page_lock(m); - vm_page_deactivate(m); - vm_page_unlock(m); - vm_page_xunbusy(m); - } else { - vm_page_lock(m); - vm_page_flash(m); - vm_page_unlock(m); - } + m->valid = VM_PAGE_BITS_ALL; } else { /* * For write success, clear the dirty @@ -1727,7 +1637,7 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex) return; } - if (swap_pager_getpages(object, &m, 1, 0) != VM_PAGER_OK) + if (swap_pager_getpages(object, &m, 1, NULL, NULL) != VM_PAGER_OK) panic("swap_pager_force_pagein: read from swap failed");/*XXX*/ vm_object_pip_wakeup(object); vm_page_dirty(m); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 6990d12e0aad..a7e3d3760e56 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -107,13 +107,8 @@ __FBSDID("$FreeBSD$"); #define PFBAK 4 #define PFFOR 4 -static int vm_fault_additional_pages(vm_page_t, int, int, vm_page_t *, int *); - -#define VM_FAULT_READ_BEHIND 8 #define VM_FAULT_READ_DEFAULT (1 + VM_FAULT_READ_AHEAD_INIT) #define VM_FAULT_READ_MAX (1 + VM_FAULT_READ_AHEAD_MAX) -#define VM_FAULT_NINCR (VM_FAULT_READ_MAX / VM_FAULT_READ_BEHIND) -#define VM_FAULT_SUM (VM_FAULT_NINCR * (VM_FAULT_NINCR + 1) / 2) #define VM_FAULT_DONTNEED_MIN 1048576 @@ -133,7 +128,7 @@ struct faultstate { static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, int ahead); static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, - int faultcount, int reqpage); + int backward, int forward); static inline void release_page(struct faultstate *fs) @@ -288,11 +283,10 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { vm_prot_t prot; - int alloc_req, era, faultcount, nera, reqpage, result; + int alloc_req, era, faultcount, nera, result; boolean_t growstack, is_first_object_locked, wired; int map_generation; vm_object_t next_object; - vm_page_t marray[VM_FAULT_READ_MAX]; int hardfault; struct faultstate fs; struct vnode *vp; @@ -303,7 +297,7 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, growstack = TRUE; PCPU_INC(cnt.v_vm_faults); fs.vp = NULL; - faultcount = reqpage = 0; + faultcount = 0; RetryFault:; @@ -389,7 +383,7 @@ RetryFault:; FALSE); VM_OBJECT_RUNLOCK(fs.first_object); if (!wired) - vm_fault_prefault(&fs, vaddr, 0, 0); + vm_fault_prefault(&fs, vaddr, PFBAK, PFFOR); vm_map_lookup_done(fs.map, fs.entry); curthread->td_ru.ru_minflt++; return (KERN_SUCCESS); @@ -652,36 +646,13 @@ RetryFault:; ("vm_fault: vnode-backed object mapped by system map")); /* - * now we find out if any other pages should be paged - * in at this time this routine checks to see if the - * pages surrounding this fault reside in the same - * object as the page for this fault. If they do, - * then they are faulted in also into the object. The - * array "marray" returned contains an array of - * vm_page_t structs where one of them is the - * vm_page_t passed to the routine. The reqpage - * return value is the index into the marray for the - * vm_page_t passed to the routine. - * - * fs.m plus the additional pages are exclusive busied. + * Page in the requested page and hint the pager, + * that it may bring up surrounding pages. */ - faultcount = vm_fault_additional_pages( - fs.m, behind, ahead, marray, &reqpage); - - rv = faultcount ? - vm_pager_get_pages(fs.object, marray, faultcount, - reqpage) : VM_PAGER_FAIL; - + rv = vm_pager_get_pages(fs.object, &fs.m, 1, + &behind, &ahead); if (rv == VM_PAGER_OK) { - /* - * Found the page. Leave it busy while we play - * with it. - * - * Pager could have changed the page. Pager - * is responsible for disposition of old page - * if moved. - */ - fs.m = marray[reqpage]; + faultcount = behind + 1 + ahead; hardfault++; break; /* break to PAGE HAS BEEN FOUND */ } @@ -965,16 +936,13 @@ RetryFault:; } /* * If the page was filled by a pager, update the map entry's - * last read offset. Since the pager does not return the - * actual set of pages that it read, this update is based on - * the requested set. Typically, the requested and actual - * sets are the same. + * last read offset. * * XXX The following assignment modifies the map * without holding a write lock on it. */ if (hardfault) - fs.entry->next_read = fs.pindex + faultcount - reqpage; + fs.entry->next_read = fs.pindex + ahead + 1; vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags, TRUE); vm_page_assert_xbusied(fs.m); @@ -997,7 +965,9 @@ RetryFault:; fault_type | (wired ? PMAP_ENTER_WIRED : 0), 0); if (faultcount != 1 && (fault_flags & VM_FAULT_WIRE) == 0 && wired == 0) - vm_fault_prefault(&fs, vaddr, faultcount, reqpage); + vm_fault_prefault(&fs, vaddr, + faultcount > 0 ? behind : PFBAK, + faultcount > 0 ? ahead : PFFOR); VM_OBJECT_WLOCK(fs.object); vm_page_lock(fs.m); @@ -1114,7 +1084,7 @@ vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, int ahead) */ static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, - int faultcount, int reqpage) + int backward, int forward) { pmap_t pmap; vm_map_entry_t entry; @@ -1122,19 +1092,12 @@ vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, vm_offset_t addr, starta; vm_pindex_t pindex; vm_page_t m; - int backward, forward, i; + int i; pmap = fs->map->pmap; if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) return; - if (faultcount > 0) { - backward = reqpage; - forward = faultcount - reqpage - 1; - } else { - backward = PFBAK; - forward = PFFOR; - } entry = fs->entry; starta = addra - backward * PAGE_SIZE; @@ -1465,133 +1428,6 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, } } - -/* - * This routine checks around the requested page for other pages that - * might be able to be faulted in. This routine brackets the viable - * pages for the pages to be paged in. - * - * Inputs: - * m, rbehind, rahead - * - * Outputs: - * marray (array of vm_page_t), reqpage (index of requested page) - * - * Return value: - * number of pages in marray - */ -static int -vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) - vm_page_t m; - int rbehind; - int rahead; - vm_page_t *marray; - int *reqpage; -{ - int i,j; - vm_object_t object; - vm_pindex_t pindex, startpindex, endpindex, tpindex; - vm_page_t rtm; - int cbehind, cahead; - - VM_OBJECT_ASSERT_WLOCKED(m->object); - - object = m->object; - pindex = m->pindex; - cbehind = cahead = 0; - - /* - * if the requested page is not available, then give up now - */ - if (!vm_pager_has_page(object, pindex, &cbehind, &cahead)) { - return 0; - } - - if ((cbehind == 0) && (cahead == 0)) { - *reqpage = 0; - marray[0] = m; - return 1; - } - - if (rahead > cahead) { - rahead = cahead; - } - - if (rbehind > cbehind) { - rbehind = cbehind; - } - - /* - * scan backward for the read behind pages -- in memory - */ - if (pindex > 0) { - if (rbehind > pindex) { - rbehind = pindex; - startpindex = 0; - } else { - startpindex = pindex - rbehind; - } - - if ((rtm = TAILQ_PREV(m, pglist, listq)) != NULL && - rtm->pindex >= startpindex) - startpindex = rtm->pindex + 1; - - /* tpindex is unsigned; beware of numeric underflow. */ - for (i = 0, tpindex = pindex - 1; tpindex >= startpindex && - tpindex < pindex; i++, tpindex--) { - - rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL | - VM_ALLOC_IFNOTCACHED); - if (rtm == NULL) { - /* - * Shift the allocated pages to the - * beginning of the array. - */ - for (j = 0; j < i; j++) { - marray[j] = marray[j + tpindex + 1 - - startpindex]; - } - break; - } - - marray[tpindex - startpindex] = rtm; - } - } else { - startpindex = 0; - i = 0; - } - - marray[i] = m; - /* page offset of the required page */ - *reqpage = i; - - tpindex = pindex + 1; - i++; - - /* - * scan forward for the read ahead pages - */ - endpindex = tpindex + rahead; - if ((rtm = TAILQ_NEXT(m, listq)) != NULL && rtm->pindex < endpindex) - endpindex = rtm->pindex; - if (endpindex > object->size) - endpindex = object->size; - - for (; tpindex < endpindex; i++, tpindex++) { - - rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL | - VM_ALLOC_IFNOTCACHED); - if (rtm == NULL) { - break; - } - - marray[i] = rtm; - } - - /* return number of pages */ - return i; -} - /* * Block entry into the machine-independent layer's page fault handler by * the calling thread. Subsequent calls to vm_fault() by that thread will diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index f00dce1ac3ae..e1538db130fd 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -238,7 +238,7 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset) pindex = OFF_TO_IDX(offset); m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL); if (m->valid != VM_PAGE_BITS_ALL) { - rv = vm_pager_get_pages(object, &m, 1, 0); + rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(m); vm_page_free(m); @@ -567,37 +567,37 @@ vm_thread_swapin(struct thread *td) { vm_object_t ksobj; vm_page_t ma[KSTACK_MAX_PAGES]; - int i, j, pages, rv; + int pages; pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; VM_OBJECT_WLOCK(ksobj); - for (i = 0; i < pages; i++) + for (int i = 0; i < pages; i++) ma[i] = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_WIRED); - for (i = 0; i < pages; i++) { - if (ma[i]->valid != VM_PAGE_BITS_ALL) { - vm_page_assert_xbusied(ma[i]); - vm_object_pip_add(ksobj, 1); - for (j = i + 1; j < pages; j++) { - if (ma[j]->valid != VM_PAGE_BITS_ALL) - vm_page_assert_xbusied(ma[j]); - if (ma[j]->valid == VM_PAGE_BITS_ALL) - break; - } - rv = vm_pager_get_pages(ksobj, ma + i, j - i, 0); - if (rv != VM_PAGER_OK) - panic("vm_thread_swapin: cannot get kstack for proc: %d", - td->td_proc->p_pid); - /* - * All pages in the array are in place, due to the - * pager is always the swap pager, which doesn't - * free or remove wired non-req pages from object. - */ - vm_object_pip_wakeup(ksobj); - vm_page_xunbusy(ma[i]); - } else if (vm_page_xbusied(ma[i])) + for (int i = 0; i < pages;) { + int j, a, count, rv; + + vm_page_assert_xbusied(ma[i]); + if (ma[i]->valid == VM_PAGE_BITS_ALL) { vm_page_xunbusy(ma[i]); + i++; + continue; + } + vm_object_pip_add(ksobj, 1); + for (j = i + 1; j < pages; j++) + if (ma[j]->valid == VM_PAGE_BITS_ALL) + break; + rv = vm_pager_has_page(ksobj, ma[i]->pindex, NULL, &a); + KASSERT(rv == 1, ("%s: missing page %p", __func__, ma[i])); + count = min(a + 1, j - i); + rv = vm_pager_get_pages(ksobj, ma + i, count, NULL, NULL); + KASSERT(rv == VM_PAGER_OK, ("%s: cannot get kstack for proc %d", + __func__, td->td_proc->p_pid)); + vm_object_pip_wakeup(ksobj); + for (j = i; j < i + count; j++) + vm_page_xunbusy(ma[j]); + i += count; } VM_OBJECT_WUNLOCK(ksobj); pmap_qenter(td->td_kstack, ma, pages); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 9c5c83afb064..bc0e4c05f126 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -2014,7 +2014,7 @@ vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end) for (pindex = start; pindex < end; pindex++) { m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL); if (m->valid != VM_PAGE_BITS_ALL) { - rv = vm_pager_get_pages(object, &m, 1, 0); + rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(m); vm_page_free(m); diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 894a8d5616ba..ca2d73a75a42 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -243,6 +243,8 @@ extern struct vm_object kmem_object_store; rw_try_upgrade(&(object)->lock) #define VM_OBJECT_WLOCK(object) \ rw_wlock(&(object)->lock) +#define VM_OBJECT_WOWNED(object) \ + rw_wowned(&(object)->lock) #define VM_OBJECT_WUNLOCK(object) \ rw_wunlock(&(object)->lock) diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index e5edf77301c4..2e6b56a6d680 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -979,38 +979,28 @@ vm_page_free_zero(vm_page_t m) /* * Unbusy and handle the page queueing for a page from the VOP_GETPAGES() - * array which is not the request page. + * array which was optionally read ahead or behind. */ void vm_page_readahead_finish(vm_page_t m) { - if (m->valid != 0) { - /* - * Since the page is not the requested page, whether - * it should be activated or deactivated is not - * obvious. Empirical results have shown that - * deactivating the page is usually the best choice, - * unless the page is wanted by another thread. - */ - vm_page_lock(m); - if ((m->busy_lock & VPB_BIT_WAITERS) != 0) - vm_page_activate(m); - else - vm_page_deactivate(m); - vm_page_unlock(m); - vm_page_xunbusy(m); - } else { - /* - * Free the completely invalid page. Such page state - * occurs due to the short read operation which did - * not covered our page at all, or in case when a read - * error happens. - */ - vm_page_lock(m); - vm_page_free(m); - vm_page_unlock(m); - } + /* We shouldn't put invalid pages on queues. */ + KASSERT(m->valid != 0, ("%s: %p is invalid", __func__, m)); + + /* + * Since the page is not the actually needed one, whether it should + * be activated or deactivated is not obvious. Empirical results + * have shown that deactivating the page is usually the best choice, + * unless the page is wanted by another thread. + */ + vm_page_lock(m); + if ((m->busy_lock & VPB_BIT_WAITERS) != 0) + vm_page_activate(m); + else + vm_page_deactivate(m); + vm_page_unlock(m); + vm_page_xunbusy(m); } /* diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c index 40d7d4e43ac7..7a1e6ae772e3 100644 --- a/sys/vm/vm_pager.c +++ b/sys/vm/vm_pager.c @@ -88,7 +88,7 @@ int cluster_pbuf_freecnt = -1; /* unlimited to begin with */ struct buf *swbuf; -static int dead_pager_getpages(vm_object_t, vm_page_t *, int, int); +static int dead_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static vm_object_t dead_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); static void dead_pager_putpages(vm_object_t, vm_page_t *, int, int, int *); @@ -96,13 +96,11 @@ static boolean_t dead_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); static void dead_pager_dealloc(vm_object_t); static int -dead_pager_getpages(obj, ma, count, req) - vm_object_t obj; - vm_page_t *ma; - int count; - int req; +dead_pager_getpages(vm_object_t obj, vm_page_t *ma, int count, int *rbehind, + int *rahead) { - return VM_PAGER_FAIL; + + return (VM_PAGER_FAIL); } static vm_object_t @@ -282,45 +280,47 @@ vm_pager_assert_in(vm_object_t object, vm_page_t *m, int count) * The requested page must be fully valid on successful return. */ int -vm_pager_get_pages(vm_object_t object, vm_page_t *m, int count, int reqpage) +vm_pager_get_pages(vm_object_t object, vm_page_t *m, int count, int *rbehind, + int *rahead) { +#ifdef INVARIANTS + vm_pindex_t pindex = m[0]->pindex; +#endif int r; vm_pager_assert_in(object, m, count); - r = (*pagertab[object->type]->pgo_getpages)(object, m, count, reqpage); + r = (*pagertab[object->type]->pgo_getpages)(object, m, count, rbehind, + rahead); if (r != VM_PAGER_OK) return (r); - /* - * If pager has replaced the page, assert that it had - * updated the array. Also assert that page is still - * busied. - */ - KASSERT(m[reqpage] == vm_page_lookup(object, m[reqpage]->pindex), - ("%s: mismatch page %p pindex %ju", __func__, - m[reqpage], (uintmax_t )m[reqpage]->pindex)); - vm_page_assert_xbusied(m[reqpage]); - - /* - * Pager didn't fill up entire page. Zero out - * partially filled data. - */ - if (m[reqpage]->valid != VM_PAGE_BITS_ALL) - vm_page_zero_invalid(m[reqpage], TRUE); - + for (int i = 0; i < count; i++) { + /* + * If pager has replaced a page, assert that it had + * updated the array. + */ + KASSERT(m[i] == vm_page_lookup(object, pindex++), + ("%s: mismatch page %p pindex %ju", __func__, + m[i], (uintmax_t )pindex - 1)); + /* + * Zero out partially filled data. + */ + if (m[i]->valid != VM_PAGE_BITS_ALL) + vm_page_zero_invalid(m[i], TRUE); + } return (VM_PAGER_OK); } int vm_pager_get_pages_async(vm_object_t object, vm_page_t *m, int count, - int reqpage, pgo_getpages_iodone_t iodone, void *arg) + int *rbehind, int *rahead, pgo_getpages_iodone_t iodone, void *arg) { vm_pager_assert_in(object, m, count); return ((*pagertab[object->type]->pgo_getpages_async)(object, m, - count, reqpage, iodone, arg)); + count, rbehind, rahead, iodone, arg)); } /* @@ -354,39 +354,6 @@ vm_pager_object_lookup(struct pagerlst *pg_list, void *handle) return (object); } -/* - * Free the non-requested pages from the given array. To remove all pages, - * caller should provide out of range reqpage number. - */ -void -vm_pager_free_nonreq(vm_object_t object, vm_page_t ma[], int reqpage, - int npages, boolean_t object_locked) -{ - enum { UNLOCKED, CALLER_LOCKED, INTERNALLY_LOCKED } locked; - int i; - - if (object_locked) { - VM_OBJECT_ASSERT_WLOCKED(object); - locked = CALLER_LOCKED; - } else { - VM_OBJECT_ASSERT_UNLOCKED(object); - locked = UNLOCKED; - } - for (i = 0; i < npages; ++i) { - if (i != reqpage) { - if (locked == UNLOCKED) { - VM_OBJECT_WLOCK(object); - locked = INTERNALLY_LOCKED; - } - vm_page_lock(ma[i]); - vm_page_free(ma[i]); - vm_page_unlock(ma[i]); - } - } - if (locked == INTERNALLY_LOCKED) - VM_OBJECT_WUNLOCK(object); -} - /* * initialize a physical buffer */ diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h index 6884729f7667..4b7d100a94aa 100644 --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -50,9 +50,9 @@ typedef void pgo_init_t(void); typedef vm_object_t pgo_alloc_t(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); typedef void pgo_dealloc_t(vm_object_t); -typedef int pgo_getpages_t(vm_object_t, vm_page_t *, int, int); +typedef int pgo_getpages_t(vm_object_t, vm_page_t *, int, int *, int *); typedef void pgo_getpages_iodone_t(void *, vm_page_t *, int, int); -typedef int pgo_getpages_async_t(vm_object_t, vm_page_t *, int, int, +typedef int pgo_getpages_async_t(vm_object_t, vm_page_t *, int, int *, int *, pgo_getpages_iodone_t, void *); typedef void pgo_putpages_t(vm_object_t, vm_page_t *, int, int, int *); typedef boolean_t pgo_haspage_t(vm_object_t, vm_pindex_t, int *, int *); @@ -106,14 +106,12 @@ vm_object_t vm_pager_allocate(objtype_t, void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); void vm_pager_bufferinit(void); void vm_pager_deallocate(vm_object_t); -int vm_pager_get_pages(vm_object_t, vm_page_t *, int, int); -int vm_pager_get_pages_async(vm_object_t, vm_page_t *, int, int, +int vm_pager_get_pages(vm_object_t, vm_page_t *, int, int *, int *); +int vm_pager_get_pages_async(vm_object_t, vm_page_t *, int, int *, int *, pgo_getpages_iodone_t, void *); static __inline boolean_t vm_pager_has_page(vm_object_t, vm_pindex_t, int *, int *); void vm_pager_init(void); vm_object_t vm_pager_object_lookup(struct pagerlst *, void *); -void vm_pager_free_nonreq(vm_object_t object, vm_page_t ma[], int reqpage, - int npages, boolean_t object_locked); static __inline void vm_pager_put_pages( diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 96510ac3083f..ff30f4d208ea 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -84,11 +84,9 @@ static int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address, static int vnode_pager_input_smlfs(vm_object_t object, vm_page_t m); static int vnode_pager_input_old(vm_object_t object, vm_page_t m); static void vnode_pager_dealloc(vm_object_t); -static int vnode_pager_local_getpages0(struct vnode *, vm_page_t *, int, int, - vop_getpages_iodone_t, void *); -static int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int); -static int vnode_pager_getpages_async(vm_object_t, vm_page_t *, int, int, - vop_getpages_iodone_t, void *); +static int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); +static int vnode_pager_getpages_async(vm_object_t, vm_page_t *, int, int *, + int *, vop_getpages_iodone_t, void *); static void vnode_pager_putpages(vm_object_t, vm_page_t *, int, int, int *); static boolean_t vnode_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); static vm_object_t vnode_pager_alloc(void *, vm_ooffset_t, vm_prot_t, @@ -673,15 +671,15 @@ vnode_pager_input_old(vm_object_t object, vm_page_t m) * backing vp's VOP_GETPAGES. */ static int -vnode_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) +vnode_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind, + int *rahead) { - int rtval; struct vnode *vp; - int bytes = count * PAGE_SIZE; + int rtval; vp = object->handle; VM_OBJECT_WUNLOCK(object); - rtval = VOP_GETPAGES(vp, m, bytes, reqpage); + rtval = VOP_GETPAGES(vp, m, count, rbehind, rahead); KASSERT(rtval != EOPNOTSUPP, ("vnode_pager: FS getpages not implemented\n")); VM_OBJECT_WLOCK(object); @@ -690,15 +688,14 @@ vnode_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) static int vnode_pager_getpages_async(vm_object_t object, vm_page_t *m, int count, - int reqpage, vop_getpages_iodone_t iodone, void *arg) + int *rbehind, int *rahead, vop_getpages_iodone_t iodone, void *arg) { struct vnode *vp; int rtval; vp = object->handle; VM_OBJECT_WUNLOCK(object); - rtval = VOP_GETPAGES_ASYNC(vp, m, count * PAGE_SIZE, reqpage, - iodone, arg); + rtval = VOP_GETPAGES_ASYNC(vp, m, count, rbehind, rahead, iodone, arg); KASSERT(rtval != EOPNOTSUPP, ("vnode_pager: FS getpages_async not implemented\n")); VM_OBJECT_WLOCK(object); @@ -714,48 +711,16 @@ int vnode_pager_local_getpages(struct vop_getpages_args *ap) { - return (vnode_pager_local_getpages0(ap->a_vp, ap->a_m, ap->a_count, - ap->a_reqpage, NULL, NULL)); + return (vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_rbehind, ap->a_rahead, NULL, NULL)); } int vnode_pager_local_getpages_async(struct vop_getpages_async_args *ap) { - return (vnode_pager_local_getpages0(ap->a_vp, ap->a_m, ap->a_count, - ap->a_reqpage, ap->a_iodone, ap->a_arg)); -} - -static int -vnode_pager_local_getpages0(struct vnode *vp, vm_page_t *m, int bytecount, - int reqpage, vop_getpages_iodone_t iodone, void *arg) -{ - vm_page_t mreq; - - mreq = m[reqpage]; - - /* - * Since the caller has busied the requested page, that page's valid - * field will not be changed by other threads. - */ - vm_page_assert_xbusied(mreq); - - /* - * The requested page has valid blocks. Invalid part can only - * exist at the end of file, and the page is made fully valid - * by zeroing in vm_pager_get_pages(). Free non-requested - * pages, since no i/o is done to read its content. - */ - if (mreq->valid != 0) { - vm_pager_free_nonreq(mreq->object, m, reqpage, - round_page(bytecount) / PAGE_SIZE, FALSE); - if (iodone != NULL) - iodone(arg, m, reqpage, 0); - return (VM_PAGER_OK); - } - - return (vnode_pager_generic_getpages(vp, m, bytecount, reqpage, - iodone, arg)); + return (vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_rbehind, ap->a_rahead, ap->a_iodone, ap->a_arg)); } /* @@ -763,25 +728,43 @@ vnode_pager_local_getpages0(struct vnode *vp, vm_page_t *m, int bytecount, * own vnodes if they fail to implement VOP_GETPAGES. */ int -vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, - int reqpage, vop_getpages_iodone_t iodone, void *arg) +vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count, + int *a_rbehind, int *a_rahead, vop_getpages_iodone_t iodone, void *arg) { vm_object_t object; struct bufobj *bo; struct buf *bp; - daddr_t firstaddr, reqblock; - off_t foff, pib; - int pbefore, pafter, i, size, bsize, first, last, *freecnt; - int count, error, before, after, secmask; + off_t foff; + int bsize, pagesperblock, *freecnt; + int error, before, after, rbehind, rahead, poff, i; + int bytecount, secmask; KASSERT(vp->v_type != VCHR && vp->v_type != VBLK, - ("vnode_pager_generic_getpages does not support devices")); + ("%s does not support devices", __func__)); + if (vp->v_iflag & VI_DOOMED) return (VM_PAGER_BAD); object = vp->v_object; - count = bytecount / PAGE_SIZE; + foff = IDX_TO_OFF(m[0]->pindex); bsize = vp->v_mount->mnt_stat.f_iosize; + pagesperblock = bsize / PAGE_SIZE; + + KASSERT(foff < object->un_pager.vnp.vnp_size, + ("%s: page %p offset beyond vp %p size", __func__, m[0], vp)); + KASSERT(count <= sizeof(bp->b_pages), + ("%s: requested %d pages", __func__, count)); + + /* + * The last page has valid blocks. Invalid part can only + * exist at the end of file, and the page is made fully valid + * by zeroing in vm_pager_get_pages(). + */ + if (m[count - 1]->valid != 0 && --count == 0) { + if (iodone != NULL) + iodone(arg, m, 1, 0); + return (VM_PAGER_OK); + } /* * Synchronous and asynchronous paging operations use different @@ -800,130 +783,182 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, * If the file system doesn't support VOP_BMAP, use old way of * getting pages via VOP_READ. */ - error = VOP_BMAP(vp, IDX_TO_OFF(m[reqpage]->pindex) / bsize, &bo, - &reqblock, &after, &before); + error = VOP_BMAP(vp, foff / bsize, &bo, &bp->b_blkno, &after, &before); if (error == EOPNOTSUPP) { relpbuf(bp, freecnt); VM_OBJECT_WLOCK(object); - for (i = 0; i < count; i++) - if (i != reqpage) { - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); - } - PCPU_INC(cnt.v_vnodein); - PCPU_INC(cnt.v_vnodepgsin); - error = vnode_pager_input_old(object, m[reqpage]); + for (i = 0; i < count; i++) { + PCPU_INC(cnt.v_vnodein); + PCPU_INC(cnt.v_vnodepgsin); + error = vnode_pager_input_old(object, m[i]); + if (error) + break; + } VM_OBJECT_WUNLOCK(object); return (error); } else if (error != 0) { relpbuf(bp, freecnt); - vm_pager_free_nonreq(object, m, reqpage, count, FALSE); return (VM_PAGER_ERROR); - - /* - * If the blocksize is smaller than a page size, then use - * special small filesystem code. - */ - } else if ((PAGE_SIZE / bsize) > 1) { - relpbuf(bp, freecnt); - vm_pager_free_nonreq(object, m, reqpage, count, FALSE); - PCPU_INC(cnt.v_vnodein); - PCPU_INC(cnt.v_vnodepgsin); - return (vnode_pager_input_smlfs(object, m[reqpage])); } /* - * Since the caller has busied the requested page, that page's valid - * field will not be changed by other threads. + * If the file system supports BMAP, but blocksize is smaller + * than a page size, then use special small filesystem code. */ - vm_page_assert_xbusied(m[reqpage]); + if (pagesperblock == 0) { + for (i = 0; i < count; i++) { + PCPU_INC(cnt.v_vnodein); + PCPU_INC(cnt.v_vnodepgsin); + error = vnode_pager_input_smlfs(object, m[i]); + if (error) + break; + } + return (error); + } /* - * If we have a completely valid page available to us, we can - * clean up and return. Otherwise we have to re-read the - * media. + * A sparse file can be encountered only for a single page request, + * which may not be preceeded by call to vm_pager_haspage(). */ - if (m[reqpage]->valid == VM_PAGE_BITS_ALL) { + if (bp->b_blkno == -1) { + KASSERT(count == 1, + ("%s: array[%d] request to a sparse file %p", __func__, + count, vp)); relpbuf(bp, freecnt); - vm_pager_free_nonreq(object, m, reqpage, count, FALSE); - return (VM_PAGER_OK); - } else if (reqblock == -1) { - relpbuf(bp, freecnt); - pmap_zero_page(m[reqpage]); - KASSERT(m[reqpage]->dirty == 0, - ("vnode_pager_generic_getpages: page %p is dirty", m)); + pmap_zero_page(m[0]); + KASSERT(m[0]->dirty == 0, ("%s: page %p is dirty", + __func__, m[0])); VM_OBJECT_WLOCK(object); - m[reqpage]->valid = VM_PAGE_BITS_ALL; - vm_pager_free_nonreq(object, m, reqpage, count, TRUE); + m[0]->valid = VM_PAGE_BITS_ALL; VM_OBJECT_WUNLOCK(object); return (VM_PAGER_OK); - } else if (m[reqpage]->valid != 0) { - VM_OBJECT_WLOCK(object); - m[reqpage]->valid = 0; - VM_OBJECT_WUNLOCK(object); } - pib = IDX_TO_OFF(m[reqpage]->pindex) % bsize; - pbefore = ((daddr_t)before * bsize + pib) / PAGE_SIZE; - pafter = ((daddr_t)(after + 1) * bsize - pib) / PAGE_SIZE - 1; - first = reqpage < pbefore ? 0 : reqpage - pbefore; - last = reqpage + pafter >= count ? count - 1 : reqpage + pafter; - if (first > 0 || last + 1 < count) { + bp->b_blkno += (foff % bsize) / DEV_BSIZE; + + /* Recalculate blocks available after/before to pages. */ + poff = (foff % bsize) / PAGE_SIZE; + before *= pagesperblock; + before += poff; + after *= pagesperblock; + after += pagesperblock - (poff + 1); + if (m[0]->pindex + after >= object->size) + after = object->size - 1 - m[0]->pindex; + KASSERT(count <= after + 1, ("%s: %d pages asked, can do only %d", + __func__, count, after + 1)); + after -= count - 1; + + /* Trim requested rbehind/rahead to possible values. */ + rbehind = a_rbehind ? *a_rbehind : 0; + rahead = a_rahead ? *a_rahead : 0; + rbehind = min(rbehind, before); + rbehind = min(rbehind, m[0]->pindex); + rahead = min(rahead, after); + rahead = min(rahead, object->size - m[count - 1]->pindex); + KASSERT(rbehind + rahead + count <= sizeof(bp->b_pages), + ("%s: behind %d ahead %d count %d", __func__, + rbehind, rahead, count)); + + /* + * Fill in the bp->b_pages[] array with requested and optional + * read behind or read ahead pages. Read behind pages are looked + * up in a backward direction, down to a first cached page. Same + * for read ahead pages, but there is no need to shift the array + * in case of encountering a cached page. + */ + i = bp->b_npages = 0; + if (rbehind) { + vm_pindex_t startpindex, tpindex; + vm_page_t p; + VM_OBJECT_WLOCK(object); - for (i = 0; i < first; i++) { - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); + startpindex = m[0]->pindex - rbehind; + if ((p = TAILQ_PREV(m[0], pglist, listq)) != NULL && + p->pindex >= startpindex) + startpindex = p->pindex + 1; + + /* tpindex is unsigned; beware of numeric underflow. */ + for (tpindex = m[0]->pindex - 1; + tpindex >= startpindex && tpindex < m[0]->pindex; + tpindex--, i++) { + p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL | + VM_ALLOC_IFNOTCACHED); + if (p == NULL) { + /* Shift the array. */ + for (int j = 0; j < i; j++) + bp->b_pages[j] = bp->b_pages[j + + tpindex + 1 - startpindex]; + break; + } + bp->b_pages[tpindex - startpindex] = p; } - for (i = last + 1; i < count; i++) { - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); + + bp->b_pgbefore = i; + bp->b_npages += i; + bp->b_blkno -= IDX_TO_OFF(i) / DEV_BSIZE; + } else + bp->b_pgbefore = 0; + + /* Requested pages. */ + for (int j = 0; j < count; j++, i++) + bp->b_pages[i] = m[j]; + bp->b_npages += count; + + if (rahead) { + vm_pindex_t endpindex, tpindex; + vm_page_t p; + + if (!VM_OBJECT_WOWNED(object)) + VM_OBJECT_WLOCK(object); + endpindex = m[count - 1]->pindex + rahead + 1; + if ((p = TAILQ_NEXT(m[count - 1], listq)) != NULL && + p->pindex < endpindex) + endpindex = p->pindex; + if (endpindex > object->size) + endpindex = object->size; + + for (tpindex = m[count - 1]->pindex + 1; + tpindex < endpindex; i++, tpindex++) { + p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL | + VM_ALLOC_IFNOTCACHED); + if (p == NULL) + break; + bp->b_pages[i] = p; } + + bp->b_pgafter = i - bp->b_npages; + bp->b_npages = i; + } else + bp->b_pgafter = 0; + + if (VM_OBJECT_WOWNED(object)) VM_OBJECT_WUNLOCK(object); - } + + /* Report back actual behind/ahead read. */ + if (a_rbehind) + *a_rbehind = bp->b_pgbefore; + if (a_rahead) + *a_rahead = bp->b_pgafter; + + KASSERT(bp->b_npages <= sizeof(bp->b_pages), + ("%s: buf %p overflowed", __func__, bp)); /* - * here on direct device I/O - */ - firstaddr = reqblock; - firstaddr += pib / DEV_BSIZE; - firstaddr -= IDX_TO_OFF(reqpage - first) / DEV_BSIZE; - - /* - * The first and last page have been calculated now, move - * input pages to be zero based, and adjust the count. - */ - m += first; - reqpage -= first; - count = last - first + 1; - - /* - * calculate the file virtual address for the transfer - */ - foff = IDX_TO_OFF(m[0]->pindex); - - /* - * calculate the size of the transfer - */ - size = count * PAGE_SIZE; - KASSERT(count > 0, ("zero count")); - if ((foff + size) > object->un_pager.vnp.vnp_size) - size = object->un_pager.vnp.vnp_size - foff; - KASSERT(size > 0, ("zero size")); - - /* - * round up physical size for real devices. + * Recalculate first offset and bytecount with regards to read behind. + * Truncate bytecount to vnode real size and round up physical size + * for real devices. */ + foff = IDX_TO_OFF(bp->b_pages[0]->pindex); + bytecount = bp->b_npages << PAGE_SHIFT; + if ((foff + bytecount) > object->un_pager.vnp.vnp_size) + bytecount = object->un_pager.vnp.vnp_size - foff; secmask = bo->bo_bsize - 1; KASSERT(secmask < PAGE_SIZE && secmask > 0, - ("vnode_pager_generic_getpages: sector size %d too large", - secmask + 1)); - size = (size + secmask) & ~secmask; + ("%s: sector size %d too large", __func__, secmask + 1)); + bytecount = (bytecount + secmask) & ~secmask; /* - * and map the pages to be read into the kva, if the filesystem + * And map the pages to be read into the kva, if the filesystem * requires mapped buffers. */ if ((vp->v_mount->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0 && @@ -932,41 +967,32 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, bp->b_offset = 0; } else { bp->b_data = bp->b_kvabase; - pmap_qenter((vm_offset_t)bp->b_data, m, count); + pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages); } - /* build a minimal buffer header */ + /* Build a minimal buffer header. */ bp->b_iocmd = BIO_READ; KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred")); KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred")); bp->b_rcred = crhold(curthread->td_ucred); bp->b_wcred = crhold(curthread->td_ucred); - bp->b_blkno = firstaddr; pbgetbo(bo, bp); bp->b_vp = vp; - bp->b_bcount = size; - bp->b_bufsize = size; - bp->b_runningbufspace = bp->b_bufsize; - for (i = 0; i < count; i++) - bp->b_pages[i] = m[i]; - bp->b_npages = count; - bp->b_pager.pg_reqpage = reqpage; - atomic_add_long(&runningbufspace, bp->b_runningbufspace); - - PCPU_INC(cnt.v_vnodein); - PCPU_ADD(cnt.v_vnodepgsin, count); - - /* do the input */ + bp->b_bcount = bp->b_bufsize = bp->b_runningbufspace = bytecount; bp->b_iooffset = dbtob(bp->b_blkno); + atomic_add_long(&runningbufspace, bp->b_runningbufspace); + PCPU_INC(cnt.v_vnodein); + PCPU_ADD(cnt.v_vnodepgsin, bp->b_npages); + if (iodone != NULL) { /* async */ - bp->b_pager.pg_iodone = iodone; + bp->b_pgiodone = iodone; bp->b_caller1 = arg; bp->b_iodone = vnode_pager_generic_getpages_done_async; bp->b_flags |= B_ASYNC; BUF_KERNPROC(bp); bstrategy(bp); - /* Good bye! */ + return (VM_PAGER_OK); } else { bp->b_iodone = bdone; bstrategy(bp); @@ -977,9 +1003,8 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount, bp->b_vp = NULL; pbrelbo(bp); relpbuf(bp, &vnode_pbuf_freecnt); + return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK); } - - return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK); } static void @@ -988,8 +1013,9 @@ vnode_pager_generic_getpages_done_async(struct buf *bp) int error; error = vnode_pager_generic_getpages_done(bp); - bp->b_pager.pg_iodone(bp->b_caller1, bp->b_pages, - bp->b_pager.pg_reqpage, error); + /* Run the iodone upon the requested range. */ + bp->b_pgiodone(bp->b_caller1, bp->b_pages + bp->b_pgbefore, + bp->b_npages - bp->b_pgbefore - bp->b_pgafter, error); for (int i = 0; i < bp->b_npages; i++) bp->b_pages[i] = NULL; bp->b_vp = NULL; @@ -1052,8 +1078,8 @@ vnode_pager_generic_getpages_done(struct buf *bp) object->un_pager.vnp.vnp_size - tfoff)) == 0, ("%s: page %p is dirty", __func__, mt)); } - - if (i != bp->b_pager.pg_reqpage) + + if (i < bp->b_pgbefore || i >= bp->b_npages - bp->b_pgafter) vm_page_readahead_finish(mt); } VM_OBJECT_WUNLOCK(object); diff --git a/sys/vm/vnode_pager.h b/sys/vm/vnode_pager.h index 1ff16ebfa937..a94b09b6b2ea 100644 --- a/sys/vm/vnode_pager.h +++ b/sys/vm/vnode_pager.h @@ -41,7 +41,8 @@ #ifdef _KERNEL int vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, - int count, int reqpage, vop_getpages_iodone_t iodone, void *arg); + int count, int *rbehind, int *rahead, vop_getpages_iodone_t iodone, + void *arg); int vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *m, int count, boolean_t sync, int *rtvals);