freebsd_amp_hwpstate/sys/fs/specfs/spec_vnops.c

1006 lines
24 KiB
C
Raw Normal View History

1994-05-24 10:09:53 +00:00
/*
* Copyright (c) 1989, 1993, 1995
1994-05-24 10:09:53 +00:00
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95
1999-08-28 01:08:13 +00:00
* $FreeBSD$
1994-05-24 10:09:53 +00:00
*/
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/conf.h>
#include <sys/buf.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/stat.h>
#include <sys/fcntl.h>
1994-05-24 10:09:53 +00:00
#include <sys/disklabel.h>
#include <sys/vmmeter.h>
#include <vm/vm.h>
#include <vm/vm_prot.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/vnode_pager.h>
#include <vm/vm_extern.h>
1998-08-25 17:48:54 +00:00
static int spec_advlock __P((struct vop_advlock_args *));
static int spec_badop __P((void));
1998-08-25 17:48:54 +00:00
static int spec_bmap __P((struct vop_bmap_args *));
static int spec_close __P((struct vop_close_args *));
static int spec_freeblks __P((struct vop_freeblks_args *));
1998-08-25 17:48:54 +00:00
static int spec_fsync __P((struct vop_fsync_args *));
static int spec_getpages __P((struct vop_getpages_args *));
static int spec_inactive __P((struct vop_inactive_args *));
static int spec_ioctl __P((struct vop_ioctl_args *));
static int spec_lookup __P((struct vop_lookup_args *));
static int spec_open __P((struct vop_open_args *));
1998-08-25 17:48:54 +00:00
static int spec_poll __P((struct vop_poll_args *));
static int spec_print __P((struct vop_print_args *));
static int spec_read __P((struct vop_read_args *));
1998-08-25 17:48:54 +00:00
static int spec_strategy __P((struct vop_strategy_args *));
static int spec_write __P((struct vop_write_args *));
vop_t **spec_vnodeop_p;
1995-12-11 09:24:58 +00:00
static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
{ &vop_default_desc, (vop_t *) vop_defaultop },
{ &vop_access_desc, (vop_t *) vop_ebadf },
{ &vop_advlock_desc, (vop_t *) spec_advlock },
{ &vop_bmap_desc, (vop_t *) spec_bmap },
{ &vop_close_desc, (vop_t *) spec_close },
{ &vop_create_desc, (vop_t *) spec_badop },
{ &vop_freeblks_desc, (vop_t *) spec_freeblks },
{ &vop_fsync_desc, (vop_t *) spec_fsync },
{ &vop_getpages_desc, (vop_t *) spec_getpages },
{ &vop_inactive_desc, (vop_t *) spec_inactive },
{ &vop_ioctl_desc, (vop_t *) spec_ioctl },
{ &vop_lease_desc, (vop_t *) vop_null },
{ &vop_link_desc, (vop_t *) spec_badop },
{ &vop_lookup_desc, (vop_t *) spec_lookup },
{ &vop_mkdir_desc, (vop_t *) spec_badop },
{ &vop_mknod_desc, (vop_t *) spec_badop },
{ &vop_open_desc, (vop_t *) spec_open },
{ &vop_pathconf_desc, (vop_t *) vop_stdpathconf },
{ &vop_poll_desc, (vop_t *) spec_poll },
{ &vop_print_desc, (vop_t *) spec_print },
{ &vop_read_desc, (vop_t *) spec_read },
{ &vop_readdir_desc, (vop_t *) spec_badop },
{ &vop_readlink_desc, (vop_t *) spec_badop },
{ &vop_reallocblks_desc, (vop_t *) spec_badop },
{ &vop_reclaim_desc, (vop_t *) vop_null },
{ &vop_remove_desc, (vop_t *) spec_badop },
{ &vop_rename_desc, (vop_t *) spec_badop },
{ &vop_rmdir_desc, (vop_t *) spec_badop },
{ &vop_setattr_desc, (vop_t *) vop_ebadf },
{ &vop_strategy_desc, (vop_t *) spec_strategy },
{ &vop_symlink_desc, (vop_t *) spec_badop },
{ &vop_write_desc, (vop_t *) spec_write },
{ NULL, NULL }
1994-05-24 10:09:53 +00:00
};
1995-12-11 09:24:58 +00:00
static struct vnodeopv_desc spec_vnodeop_opv_desc =
1994-05-24 10:09:53 +00:00
{ &spec_vnodeop_p, spec_vnodeop_entries };
VNODEOP_SET(spec_vnodeop_opv_desc);
int
spec_vnoperate(ap)
struct vop_generic_args /* {
struct vnodeop_desc *a_desc;
<other random data follows, presumably>
} */ *ap;
{
return (VOCALL(spec_vnodeop_p, ap->a_desc->vdesc_offset, ap));
}
static void spec_getpages_iodone __P((struct buf *bp));
1994-05-24 10:09:53 +00:00
/*
* Trivial lookup routine that always fails.
*/
static int
1994-05-24 10:09:53 +00:00
spec_lookup(ap)
struct vop_lookup_args /* {
struct vnode *a_dvp;
struct vnode **a_vpp;
struct componentname *a_cnp;
} */ *ap;
{
*ap->a_vpp = NULL;
return (ENOTDIR);
}
/*
* Open a special file.
*/
/* ARGSUSED */
static int
1994-05-24 10:09:53 +00:00
spec_open(ap)
struct vop_open_args /* {
struct vnode *a_vp;
int a_mode;
struct ucred *a_cred;
struct proc *a_p;
} */ *ap;
{
struct proc *p = ap->a_p;
1994-05-24 10:09:53 +00:00
struct vnode *bvp, *vp = ap->a_vp;
Divorce "dev_t" from the "major|minor" bitmap, which is now called udev_t in the kernel but still called dev_t in userland. Provide functions to manipulate both types: major() umajor() minor() uminor() makedev() umakedev() dev2udev() udev2dev() For now they're functions, they will become in-line functions after one of the next two steps in this process. Return major/minor/makedev to macro-hood for userland. Register a name in cdevsw[] for the "filedescriptor" driver. In the kernel the udev_t appears in places where we have the major/minor number combination, (ie: a potential device: we may not have the driver nor the device), like in inodes, vattr, cdevsw registration and so on, whereas the dev_t appears where we carry around a reference to a actual device. In the future the cdevsw and the aliased-from vnode will be hung directly from the dev_t, along with up to two softc pointers for the device driver and a few houskeeping bits. This will essentially replace the current "alias" check code (same buck, bigger bang). A little stunt has been provided to try to catch places where the wrong type is being used (dev_t vs udev_t), if you see something not working, #undef DEVT_FASCIST in kern/kern_conf.c and see if it makes a difference. If it does, please try to track it down (many hands make light work) or at least try to reproduce it as simply as possible, and describe how to do that. Without DEVT_FASCIST I belive this patch is a no-op. Stylistic/posixoid comments about the userland view of the <sys/*.h> files welcome now, from userland they now contain the end result. Next planned step: make all dev_t's refer to the same devsw[] which means convert BLK's to CHR's at the perimeter of the vnodes and other places where they enter the game (bootdev, mknod, sysctl).
1999-05-11 19:55:07 +00:00
dev_t bdev, dev = vp->v_rdev;
int error, maxio;
struct cdevsw *dsw;
1994-05-24 10:09:53 +00:00
/*
* Don't allow open if fs is mounted -nodev.
*/
if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
return (ENXIO);
/* Make this field valid before any I/O in ->d_open */
if (!dev->si_iosize_max)
dev->si_iosize_max = DFLTPHYS;
1994-05-24 10:09:53 +00:00
switch (vp->v_type) {
case VCHR:
dsw = devsw(dev);
if ( (dsw == NULL) || (dsw->d_open == NULL))
return ENXIO;
1994-05-24 10:09:53 +00:00
if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
/*
* When running in very secure mode, do not allow
* opens for writing of any disk character devices.
*/
if (securelevel >= 2
&& dsw->d_bmaj != -1
&& (dsw->d_flags & D_TYPEMASK) == D_DISK)
1994-05-24 10:09:53 +00:00
return (EPERM);
/*
* When running in secure mode, do not allow opens
* for writing of /dev/mem, /dev/kmem, or character
* devices whose corresponding block devices are
* currently mounted.
*/
if (securelevel >= 1) {
if ((bdev = chrtoblk(dev)) != NODEV &&
vfinddev(bdev, VBLK, &bvp) &&
bvp->v_usecount > 0 &&
(error = vfs_mountedon(bvp)))
return (error);
if (iskmemdev(dev))
return (EPERM);
}
}
if ((dsw->d_flags & D_TYPEMASK) == D_TTY)
vp->v_flag |= VISTTY;
VOP_UNLOCK(vp, 0, p);
error = (*dsw->d_open)(dev, ap->a_mode, S_IFCHR, p);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
break;
1994-05-24 10:09:53 +00:00
case VBLK:
dsw = devsw(dev);
if ( (dsw == NULL) || (dsw->d_open == NULL))
return ENXIO;
1994-05-24 10:09:53 +00:00
/*
* When running in very secure mode, do not allow
* opens for writing of any disk block devices.
*/
if (securelevel >= 2 && ap->a_cred != FSCRED &&
(ap->a_mode & FWRITE) &&
(dsw->d_flags & D_TYPEMASK) == D_DISK)
1994-05-24 10:09:53 +00:00
return (EPERM);
1994-05-24 10:09:53 +00:00
/*
* Do not allow opens of block devices that are
* currently mounted.
*/
error = vfs_mountedon(vp);
if (error)
1994-05-24 10:09:53 +00:00
return (error);
error = (*dsw->d_open)(dev, ap->a_mode, S_IFBLK, p);
break;
default:
error = 0;
break;
1994-05-24 10:09:53 +00:00
}
if (vn_isdisk(vp)) {
if (!dev->si_bsize_phys)
dev->si_bsize_phys = DEV_BSIZE;
}
maxio = dev->si_iosize_max;
if (!maxio)
maxio = DFLTPHYS;
if (maxio > MAXPHYS)
maxio = MAXPHYS;
vp->v_maxio = maxio;
return (error);
1994-05-24 10:09:53 +00:00
}
/*
* Vnode op for read
*/
/* ARGSUSED */
static int
1994-05-24 10:09:53 +00:00
spec_read(ap)
struct vop_read_args /* {
struct vnode *a_vp;
struct uio *a_uio;
int a_ioflag;
struct ucred *a_cred;
} */ *ap;
{
register struct vnode *vp = ap->a_vp;
register struct uio *uio = ap->a_uio;
1994-05-24 10:09:53 +00:00
struct proc *p = uio->uio_procp;
struct buf *bp;
daddr_t bn, nextbn;
long bsize, bscale;
struct partinfo dpart;
int n, on;
d_ioctl_t *ioctl;
1994-05-24 10:09:53 +00:00
int error = 0;
int seqcount = ap->a_ioflag >> 16;
1994-05-24 10:09:53 +00:00
dev_t dev;
#ifdef DIAGNOSTIC
if (uio->uio_rw != UIO_READ)
panic("spec_read mode");
if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
panic("spec_read proc");
#endif
if (uio->uio_resid == 0)
return (0);
switch (vp->v_type) {
case VCHR:
VOP_UNLOCK(vp, 0, p);
error = (*devsw(vp->v_rdev)->d_read)
(vp->v_rdev, uio, ap->a_ioflag);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
return (error);
1994-05-24 10:09:53 +00:00
case VBLK:
if (enable_userblk_io == 0)
return (EINVAL);
1994-05-24 10:09:53 +00:00
if (uio->uio_offset < 0)
return (EINVAL);
dev = vp->v_rdev;
/*
* Calculate block size for block device. The block size must
* be larger then the physical minimum.
*/
bsize = vp->v_rdev->si_bsize_best;
if (bsize < vp->v_rdev->si_bsize_phys)
bsize = vp->v_rdev->si_bsize_phys;
if (bsize < BLKDEV_IOSIZE)
bsize = BLKDEV_IOSIZE;
if ((ioctl = devsw(dev)->d_ioctl) != NULL &&
1994-05-24 10:09:53 +00:00
(*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
dpart.part->p_fstype == FS_BSDFFS &&
dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
bsize = dpart.part->p_frag * dpart.part->p_fsize;
bscale = btodb(bsize);
1994-05-24 10:09:53 +00:00
do {
bn = btodb(uio->uio_offset) & ~(bscale - 1);
1994-05-24 10:09:53 +00:00
on = uio->uio_offset % bsize;
if (seqcount > 1) {
1994-05-24 10:09:53 +00:00
nextbn = bn + bscale;
error = breadn(vp, bn, (int)bsize, &nextbn,
(int *)&bsize, 1, NOCRED, &bp);
} else {
1994-05-24 10:09:53 +00:00
error = bread(vp, bn, (int)bsize, NOCRED, &bp);
}
/*
* Figure out how much of the buffer is valid relative
* to our offset into the buffer, which may be negative
* if we are beyond the EOF.
*
* The valid size of the buffer is based on
* bp->b_bcount (which may have been truncated by
* dscheck or the device) minus bp->b_resid, which
* may be indicative of an I/O error if non-zero.
*/
if (error == 0) {
n = bp->b_bcount - on;
if (n < 0) {
error = EINVAL;
} else {
n = min(n, bp->b_bcount - bp->b_resid - on);
if (n < 0)
error = EIO;
}
}
1994-05-24 10:09:53 +00:00
if (error) {
brelse(bp);
return (error);
}
n = min(n, uio->uio_resid);
1994-05-24 10:09:53 +00:00
error = uiomove((char *)bp->b_data + on, n, uio);
brelse(bp);
} while (error == 0 && uio->uio_resid > 0 && n != 0);
return (error);
default:
panic("spec_read type");
}
/* NOTREACHED */
}
/*
* Vnode op for write
*/
/* ARGSUSED */
static int
1994-05-24 10:09:53 +00:00
spec_write(ap)
struct vop_write_args /* {
struct vnode *a_vp;
struct uio *a_uio;
int a_ioflag;
struct ucred *a_cred;
} */ *ap;
{
register struct vnode *vp = ap->a_vp;
register struct uio *uio = ap->a_uio;
1994-05-24 10:09:53 +00:00
struct proc *p = uio->uio_procp;
struct buf *bp;
daddr_t bn;
int bsize, blkmask;
struct partinfo dpart;
register int n, on;
int error = 0;
#ifdef DIAGNOSTIC
if (uio->uio_rw != UIO_WRITE)
panic("spec_write mode");
if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
panic("spec_write proc");
#endif
switch (vp->v_type) {
case VCHR:
VOP_UNLOCK(vp, 0, p);
error = (*devsw(vp->v_rdev)->d_write)
(vp->v_rdev, uio, ap->a_ioflag);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
return (error);
1994-05-24 10:09:53 +00:00
case VBLK:
if (enable_userblk_io == 0)
return (EINVAL);
if (uio->uio_resid == 0)
return (0);
1994-05-24 10:09:53 +00:00
if (uio->uio_offset < 0)
return (EINVAL);
/*
* Calculate block size for block device. The block size must
* be larger then the physical minimum.
*/
bsize = vp->v_rdev->si_bsize_best;
if (bsize < vp->v_rdev->si_bsize_phys)
bsize = vp->v_rdev->si_bsize_phys;
if (bsize < BLKDEV_IOSIZE)
bsize = BLKDEV_IOSIZE;
if ((*devsw(vp->v_rdev)->d_ioctl)(vp->v_rdev, DIOCGPART,
1994-05-24 10:09:53 +00:00
(caddr_t)&dpart, FREAD, p) == 0) {
if (dpart.part->p_fstype == FS_BSDFFS &&
dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
bsize = dpart.part->p_frag *
dpart.part->p_fsize;
}
blkmask = btodb(bsize) - 1;
1994-05-24 10:09:53 +00:00
do {
bn = btodb(uio->uio_offset) & ~blkmask;
1994-05-24 10:09:53 +00:00
on = uio->uio_offset % bsize;
/*
* Calculate potential request size, determine
* if we can avoid a read-before-write.
*/
1994-05-24 10:09:53 +00:00
n = min((unsigned)(bsize - on), uio->uio_resid);
if (n == bsize)
bp = getblk(vp, bn, bsize, 0, 0);
else
error = bread(vp, bn, bsize, NOCRED, &bp);
/*
* n is the amount of effective space in the buffer
* that we wish to write relative to our offset into
* the buffer. We have to truncate it to the valid
* size of the buffer relative to our offset into
* the buffer (which may end up being negative if
* we are beyond the EOF).
*
* The valid size of the buffer is based on
* bp->b_bcount (which may have been truncated by
* dscheck or the device) minus bp->b_resid, which
* may be indicative of an I/O error if non-zero.
*
* XXX In a newly created buffer, b_bcount == bsize
* and, being asynchronous, we have no idea of the
* EOF.
*/
if (error == 0) {
n = min(n, bp->b_bcount - on);
if (n < 0) {
error = EINVAL;
} else {
n = min(n, bp->b_bcount - bp->b_resid - on);
if (n < 0)
error = EIO;
}
}
1994-05-24 10:09:53 +00:00
if (error) {
brelse(bp);
return (error);
}
error = uiomove((char *)bp->b_data + on, n, uio);
if (n + on == bsize)
bawrite(bp);
else
1994-05-24 10:09:53 +00:00
bdwrite(bp);
} while (error == 0 && uio->uio_resid > 0 && n != 0);
return (error);
default:
panic("spec_write type");
}
/* NOTREACHED */
}
/*
* Device ioctl operation.
*/
/* ARGSUSED */
static int
1994-05-24 10:09:53 +00:00
spec_ioctl(ap)
struct vop_ioctl_args /* {
struct vnode *a_vp;
int a_command;
caddr_t a_data;
int a_fflag;
struct ucred *a_cred;
struct proc *a_p;
} */ *ap;
{
dev_t dev = ap->a_vp->v_rdev;
switch (ap->a_vp->v_type) {
case VCHR:
return ((*devsw(dev)->d_ioctl)(dev, ap->a_command,
ap->a_data, ap->a_fflag, ap->a_p));
case VBLK:
return ((*devsw(dev)->d_ioctl)(dev, ap->a_command,
ap->a_data, ap->a_fflag, ap->a_p));
1994-05-24 10:09:53 +00:00
default:
panic("spec_ioctl");
/* NOTREACHED */
}
}
/* ARGSUSED */
static int
spec_poll(ap)
struct vop_poll_args /* {
1994-05-24 10:09:53 +00:00
struct vnode *a_vp;
int a_events;
1994-05-24 10:09:53 +00:00
struct ucred *a_cred;
struct proc *a_p;
} */ *ap;
{
register dev_t dev;
switch (ap->a_vp->v_type) {
case VCHR:
dev = ap->a_vp->v_rdev;
return (*devsw(dev)->d_poll)(dev, ap->a_events, ap->a_p);
default:
return (vop_defaultop((struct vop_generic_args *)ap));
1994-05-24 10:09:53 +00:00
}
}
/*
* Synch buffers associated with a block device
*/
/* ARGSUSED */
static int
1994-05-24 10:09:53 +00:00
spec_fsync(ap)
struct vop_fsync_args /* {
struct vnode *a_vp;
struct ucred *a_cred;
int a_waitfor;
struct proc *a_p;
} */ *ap;
{
register struct vnode *vp = ap->a_vp;
register struct buf *bp;
struct buf *nbp;
int s;
if (vp->v_type == VCHR)
return (0);
/*
* Flush all dirty buffers associated with a block device.
*/
loop:
s = splbio();
for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
nbp = TAILQ_NEXT(bp, b_vnbufs);
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
1994-05-24 10:09:53 +00:00
continue;
if ((bp->b_flags & B_DELWRI) == 0)
panic("spec_fsync: not dirty");
if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
BUF_UNLOCK(bp);
vfs_bio_awrite(bp);
splx(s);
} else {
bremfree(bp);
splx(s);
bawrite(bp);
}
1994-05-24 10:09:53 +00:00
goto loop;
}
if (ap->a_waitfor == MNT_WAIT) {
while (vp->v_numoutput) {
vp->v_flag |= VBWAIT;
(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0);
1994-05-24 10:09:53 +00:00
}
#ifdef DIAGNOSTIC
if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
1994-05-24 10:09:53 +00:00
vprint("spec_fsync: dirty", vp);
splx(s);
1994-05-24 10:09:53 +00:00
goto loop;
}
#endif
}
splx(s);
return (0);
}
static int
spec_inactive(ap)
struct vop_inactive_args /* {
struct vnode *a_vp;
struct proc *a_p;
} */ *ap;
{
VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
return (0);
}
1994-05-24 10:09:53 +00:00
/*
* Just call the device strategy routine
*/
static int
1994-05-24 10:09:53 +00:00
spec_strategy(ap)
struct vop_strategy_args /* {
struct vnode *a_vp;
1994-05-24 10:09:53 +00:00
struct buf *a_bp;
} */ *ap;
{
struct buf *bp;
1994-05-24 10:09:53 +00:00
bp = ap->a_bp;
if (((bp->b_flags & B_READ) == 0) &&
(LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start)
(*bioops.io_start)(bp);
BUF_STRATEGY(bp, 0);
1994-05-24 10:09:53 +00:00
return (0);
}
static int
spec_freeblks(ap)
struct vop_freeblks_args /* {
struct vnode *a_vp;
daddr_t a_addr;
daddr_t a_length;
} */ *ap;
{
struct cdevsw *bsw;
struct buf *bp;
bsw = devsw(ap->a_vp->v_rdev);
if ((bsw->d_flags & D_CANFREE) == 0)
return (0);
bp = geteblk(ap->a_length);
bp->b_flags |= B_FREEBUF;
bp->b_dev = ap->a_vp->v_rdev;
bp->b_blkno = ap->a_addr;
bp->b_offset = dbtob(ap->a_addr);
bp->b_bcount = ap->a_length;
BUF_STRATEGY(bp, 0);
return (0);
}
1994-05-24 10:09:53 +00:00
/*
* Implement degenerate case where the block requested is the block
* returned, and assume that the entire device is contiguous in regards
* to the contiguous block range (runp and runb).
1994-05-24 10:09:53 +00:00
*/
static int
1994-05-24 10:09:53 +00:00
spec_bmap(ap)
struct vop_bmap_args /* {
struct vnode *a_vp;
daddr_t a_bn;
struct vnode **a_vpp;
daddr_t *a_bnp;
int *a_runp;
int *a_runb;
1994-05-24 10:09:53 +00:00
} */ *ap;
{
struct vnode *vp = ap->a_vp;
int runp = 0;
int runb = 0;
1994-05-24 10:09:53 +00:00
if (ap->a_vpp != NULL)
*ap->a_vpp = vp;
1994-05-24 10:09:53 +00:00
if (ap->a_bnp != NULL)
*ap->a_bnp = ap->a_bn;
if (vp->v_type == VBLK && vp->v_mount != NULL)
runp = runb = MAXBSIZE / vp->v_mount->mnt_stat.f_iosize;
if (ap->a_runp != NULL)
*ap->a_runp = runp;
if (ap->a_runb != NULL)
*ap->a_runb = runb;
1994-05-24 10:09:53 +00:00
return (0);
}
/*
* Device close routine
*/
/* ARGSUSED */
static int
1994-05-24 10:09:53 +00:00
spec_close(ap)
struct vop_close_args /* {
struct vnode *a_vp;
int a_fflag;
struct ucred *a_cred;
struct proc *a_p;
} */ *ap;
{
register struct vnode *vp = ap->a_vp;
dev_t dev = vp->v_rdev;
int mode, error;
switch (vp->v_type) {
case VCHR:
/*
* Hack: a tty device that is a controlling terminal
* has a reference from the session structure.
* We cannot easily tell that a character device is
* a controlling terminal, unless it is the closing
* process' controlling terminal. In that case,
* if the reference count is 2 (this last descriptor
* plus the session), release the reference from the session.
*/
if (vcount(vp) == 2 && ap->a_p &&
(vp->v_flag & VXLOCK) == 0 &&
1994-05-24 10:09:53 +00:00
vp == ap->a_p->p_session->s_ttyvp) {
vrele(vp);
ap->a_p->p_session->s_ttyvp = NULL;
}
mode = S_IFCHR;
break;
case VBLK:
/*
* On last close of a block device (that isn't mounted)
* we must invalidate any in core blocks, so that
* we can, for instance, change floppy disks.
*/
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
VOP_UNLOCK(vp, 0, ap->a_p);
if (error)
1994-05-24 10:09:53 +00:00
return (error);
1994-05-24 10:09:53 +00:00
mode = S_IFBLK;
break;
default:
panic("spec_close: not special");
}
/*
* We do not want to really close the device if it
* is still in use unless we are trying to close it
* forcibly. Since every use (buffer, vnode, swap, cmap)
* holds a reference to the vnode, and because we mark
* any other vnodes that alias this device, when the
* sum of the reference counts on all the aliased
* vnodes descends to one, we are on last close.
*/
if (vp->v_flag & VXLOCK) {
/* Forced close */
} else if (devsw(dev)->d_flags & D_TRACKCLOSE) {
/* Keep device updated on status */
} else if (vcount(vp) > 1) {
return (0);
}
return (devsw(dev)->d_close(dev, ap->a_fflag, mode, ap->a_p));
1994-05-24 10:09:53 +00:00
}
/*
* Print out the contents of a special device vnode.
*/
static int
1994-05-24 10:09:53 +00:00
spec_print(ap)
struct vop_print_args /* {
struct vnode *a_vp;
} */ *ap;
{
printf("tag VT_NON, dev %s\n", devtoname(ap->a_vp->v_rdev));
return (0);
1994-05-24 10:09:53 +00:00
}
/*
* Special device advisory byte-level locks.
*/
/* ARGSUSED */
static int
1994-05-24 10:09:53 +00:00
spec_advlock(ap)
struct vop_advlock_args /* {
struct vnode *a_vp;
caddr_t a_id;
int a_op;
struct flock *a_fl;
int a_flags;
} */ *ap;
{
return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
1994-05-24 10:09:53 +00:00
}
/*
* Special device bad operation
*/
static int
1994-05-24 10:09:53 +00:00
spec_badop()
{
panic("spec_badop called");
/* NOTREACHED */
}
static void
spec_getpages_iodone(bp)
struct buf *bp;
{
bp->b_flags |= B_DONE;
wakeup(bp);
}
static int
spec_getpages(ap)
struct vop_getpages_args *ap;
{
vm_offset_t kva;
int error;
int i, pcount, size, s;
daddr_t blkno;
struct buf *bp;
vm_page_t m;
vm_ooffset_t offset;
This mega-commit is meant to fix numerous interrelated problems. There has been some bitrot and incorrect assumptions in the vfs_bio code. These problems have manifest themselves worse on NFS type filesystems, but can still affect local filesystems under certain circumstances. Most of the problems have involved mmap consistancy, and as a side-effect broke the vfs.ioopt code. This code might have been committed seperately, but almost everything is interrelated. 1) Allow (pmap_object_init_pt) prefaulting of buffer-busy pages that are fully valid. 2) Rather than deactivating erroneously read initial (header) pages in kern_exec, we now free them. 3) Fix the rundown of non-VMIO buffers that are in an inconsistent (missing vp) state. 4) Fix the disassociation of pages from buffers in brelse. The previous code had rotted and was faulty in a couple of important circumstances. 5) Remove a gratuitious buffer wakeup in vfs_vmio_release. 6) Remove a crufty and currently unused cluster mechanism for VBLK files in vfs_bio_awrite. When the code is functional, I'll add back a cleaner version. 7) The page busy count wakeups assocated with the buffer cache usage were incorrectly cleaned up in a previous commit by me. Revert to the original, correct version, but with a cleaner implementation. 8) The cluster read code now tries to keep data associated with buffers more aggressively (without breaking the heuristics) when it is presumed that the read data (buffers) will be soon needed. 9) Change to filesystem lockmgr locks so that they use LK_NOPAUSE. The delay loop waiting is not useful for filesystem locks, due to the length of the time intervals. 10) Correct and clean-up spec_getpages. 11) Implement a fully functional nfs_getpages, nfs_putpages. 12) Fix nfs_write so that modifications are coherent with the NFS data on the server disk (at least as well as NFS seems to allow.) 13) Properly support MS_INVALIDATE on NFS. 14) Properly pass down MS_INVALIDATE to lower levels of the VM code from vm_map_clean. 15) Better support the notion of pages being busy but valid, so that fewer in-transit waits occur. (use p->busy more for pageouts instead of PG_BUSY.) Since the page is fully valid, it is still usable for reads. 16) It is possible (in error) for cached pages to be busy. Make the page allocation code handle that case correctly. (It should probably be a printf or panic, but I want the system to handle coding errors robustly. I'll probably add a printf.) 17) Correct the design and usage of vm_page_sleep. It didn't handle consistancy problems very well, so make the design a little less lofty. After vm_page_sleep, if it ever blocked, it is still important to relookup the page (if the object generation count changed), and verify it's status (always.) 18) In vm_pageout.c, vm_pageout_clean had rotted, so clean that up. 19) Push the page busy for writes and VM_PROT_READ into vm_pageout_flush. 20) Fix vm_pager_put_pages and it's descendents to support an int flag instead of a boolean, so that we can pass down the invalidate bit.
1998-03-07 21:37:31 +00:00
int toff, nextoff, nread;
struct vnode *vp = ap->a_vp;
int blksiz;
This mega-commit is meant to fix numerous interrelated problems. There has been some bitrot and incorrect assumptions in the vfs_bio code. These problems have manifest themselves worse on NFS type filesystems, but can still affect local filesystems under certain circumstances. Most of the problems have involved mmap consistancy, and as a side-effect broke the vfs.ioopt code. This code might have been committed seperately, but almost everything is interrelated. 1) Allow (pmap_object_init_pt) prefaulting of buffer-busy pages that are fully valid. 2) Rather than deactivating erroneously read initial (header) pages in kern_exec, we now free them. 3) Fix the rundown of non-VMIO buffers that are in an inconsistent (missing vp) state. 4) Fix the disassociation of pages from buffers in brelse. The previous code had rotted and was faulty in a couple of important circumstances. 5) Remove a gratuitious buffer wakeup in vfs_vmio_release. 6) Remove a crufty and currently unused cluster mechanism for VBLK files in vfs_bio_awrite. When the code is functional, I'll add back a cleaner version. 7) The page busy count wakeups assocated with the buffer cache usage were incorrectly cleaned up in a previous commit by me. Revert to the original, correct version, but with a cleaner implementation. 8) The cluster read code now tries to keep data associated with buffers more aggressively (without breaking the heuristics) when it is presumed that the read data (buffers) will be soon needed. 9) Change to filesystem lockmgr locks so that they use LK_NOPAUSE. The delay loop waiting is not useful for filesystem locks, due to the length of the time intervals. 10) Correct and clean-up spec_getpages. 11) Implement a fully functional nfs_getpages, nfs_putpages. 12) Fix nfs_write so that modifications are coherent with the NFS data on the server disk (at least as well as NFS seems to allow.) 13) Properly support MS_INVALIDATE on NFS. 14) Properly pass down MS_INVALIDATE to lower levels of the VM code from vm_map_clean. 15) Better support the notion of pages being busy but valid, so that fewer in-transit waits occur. (use p->busy more for pageouts instead of PG_BUSY.) Since the page is fully valid, it is still usable for reads. 16) It is possible (in error) for cached pages to be busy. Make the page allocation code handle that case correctly. (It should probably be a printf or panic, but I want the system to handle coding errors robustly. I'll probably add a printf.) 17) Correct the design and usage of vm_page_sleep. It didn't handle consistancy problems very well, so make the design a little less lofty. After vm_page_sleep, if it ever blocked, it is still important to relookup the page (if the object generation count changed), and verify it's status (always.) 18) In vm_pageout.c, vm_pageout_clean had rotted, so clean that up. 19) Push the page busy for writes and VM_PROT_READ into vm_pageout_flush. 20) Fix vm_pager_put_pages and it's descendents to support an int flag instead of a boolean, so that we can pass down the invalidate bit.
1998-03-07 21:37:31 +00:00
int gotreqpage;
error = 0;
pcount = round_page(ap->a_count) / PAGE_SIZE;
/*
* Calculate the offset of the transfer and do sanity check.
* FreeBSD currently only supports an 8 TB range due to b_blkno
* being in DEV_BSIZE ( usually 512 ) byte chunks on call to
* VOP_STRATEGY. XXX
*/
offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;
#define DADDR_T_BIT (sizeof(daddr_t)*8)
#define OFFSET_MAX ((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1)
if (offset < 0 || offset > OFFSET_MAX) {
/* XXX still no %q in kernel. */
printf("spec_getpages: preposterous offset 0x%x%08x\n",
(u_int)((u_quad_t)offset >> 32),
(u_int)(offset & 0xffffffff));
return (VM_PAGER_ERROR);
}
blkno = btodb(offset);
/*
* Round up physical size for real devices. We cannot round using
* v_mount's block size data because v_mount has nothing to do with
* the device. i.e. it's usually '/dev'. We need the physical block
* size for the device itself.
*
* We can't use v_specmountpoint because it only exists when the
1999-08-13 10:10:12 +00:00
* block device is mounted. However, we can use v_rdev.
*/
if (vp->v_type == VBLK)
1999-08-13 10:10:12 +00:00
blksiz = vp->v_rdev->si_bsize_phys;
else
blksiz = DEV_BSIZE;
size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
bp = getpbuf(NULL);
kva = (vm_offset_t)bp->b_data;
/*
* Map the pages to be read into the kva.
*/
pmap_qenter(kva, ap->a_m, pcount);
/* Build a minimal buffer header. */
bp->b_flags = B_READ | B_CALL;
bp->b_iodone = spec_getpages_iodone;
/* B_PHYS is not set, but it is nice to fill this in. */
bp->b_rcred = bp->b_wcred = curproc->p_ucred;
if (bp->b_rcred != NOCRED)
crhold(bp->b_rcred);
if (bp->b_wcred != NOCRED)
crhold(bp->b_wcred);
bp->b_blkno = blkno;
bp->b_lblkno = blkno;
pbgetvp(ap->a_vp, bp);
bp->b_bcount = size;
bp->b_bufsize = size;
bp->b_resid = 0;
cnt.v_vnodein++;
cnt.v_vnodepgsin += pcount;
/* Do the input. */
VOP_STRATEGY(bp->b_vp, bp);
s = splbio();
/* We definitely need to be at splbio here. */
while ((bp->b_flags & B_DONE) == 0)
tsleep(bp, PVM, "spread", 0);
splx(s);
This mega-commit is meant to fix numerous interrelated problems. There has been some bitrot and incorrect assumptions in the vfs_bio code. These problems have manifest themselves worse on NFS type filesystems, but can still affect local filesystems under certain circumstances. Most of the problems have involved mmap consistancy, and as a side-effect broke the vfs.ioopt code. This code might have been committed seperately, but almost everything is interrelated. 1) Allow (pmap_object_init_pt) prefaulting of buffer-busy pages that are fully valid. 2) Rather than deactivating erroneously read initial (header) pages in kern_exec, we now free them. 3) Fix the rundown of non-VMIO buffers that are in an inconsistent (missing vp) state. 4) Fix the disassociation of pages from buffers in brelse. The previous code had rotted and was faulty in a couple of important circumstances. 5) Remove a gratuitious buffer wakeup in vfs_vmio_release. 6) Remove a crufty and currently unused cluster mechanism for VBLK files in vfs_bio_awrite. When the code is functional, I'll add back a cleaner version. 7) The page busy count wakeups assocated with the buffer cache usage were incorrectly cleaned up in a previous commit by me. Revert to the original, correct version, but with a cleaner implementation. 8) The cluster read code now tries to keep data associated with buffers more aggressively (without breaking the heuristics) when it is presumed that the read data (buffers) will be soon needed. 9) Change to filesystem lockmgr locks so that they use LK_NOPAUSE. The delay loop waiting is not useful for filesystem locks, due to the length of the time intervals. 10) Correct and clean-up spec_getpages. 11) Implement a fully functional nfs_getpages, nfs_putpages. 12) Fix nfs_write so that modifications are coherent with the NFS data on the server disk (at least as well as NFS seems to allow.) 13) Properly support MS_INVALIDATE on NFS. 14) Properly pass down MS_INVALIDATE to lower levels of the VM code from vm_map_clean. 15) Better support the notion of pages being busy but valid, so that fewer in-transit waits occur. (use p->busy more for pageouts instead of PG_BUSY.) Since the page is fully valid, it is still usable for reads. 16) It is possible (in error) for cached pages to be busy. Make the page allocation code handle that case correctly. (It should probably be a printf or panic, but I want the system to handle coding errors robustly. I'll probably add a printf.) 17) Correct the design and usage of vm_page_sleep. It didn't handle consistancy problems very well, so make the design a little less lofty. After vm_page_sleep, if it ever blocked, it is still important to relookup the page (if the object generation count changed), and verify it's status (always.) 18) In vm_pageout.c, vm_pageout_clean had rotted, so clean that up. 19) Push the page busy for writes and VM_PROT_READ into vm_pageout_flush. 20) Fix vm_pager_put_pages and it's descendents to support an int flag instead of a boolean, so that we can pass down the invalidate bit.
1998-03-07 21:37:31 +00:00
if ((bp->b_flags & B_ERROR) != 0) {
if (bp->b_error)
error = bp->b_error;
else
error = EIO;
}
nread = size - bp->b_resid;
This mega-commit is meant to fix numerous interrelated problems. There has been some bitrot and incorrect assumptions in the vfs_bio code. These problems have manifest themselves worse on NFS type filesystems, but can still affect local filesystems under certain circumstances. Most of the problems have involved mmap consistancy, and as a side-effect broke the vfs.ioopt code. This code might have been committed seperately, but almost everything is interrelated. 1) Allow (pmap_object_init_pt) prefaulting of buffer-busy pages that are fully valid. 2) Rather than deactivating erroneously read initial (header) pages in kern_exec, we now free them. 3) Fix the rundown of non-VMIO buffers that are in an inconsistent (missing vp) state. 4) Fix the disassociation of pages from buffers in brelse. The previous code had rotted and was faulty in a couple of important circumstances. 5) Remove a gratuitious buffer wakeup in vfs_vmio_release. 6) Remove a crufty and currently unused cluster mechanism for VBLK files in vfs_bio_awrite. When the code is functional, I'll add back a cleaner version. 7) The page busy count wakeups assocated with the buffer cache usage were incorrectly cleaned up in a previous commit by me. Revert to the original, correct version, but with a cleaner implementation. 8) The cluster read code now tries to keep data associated with buffers more aggressively (without breaking the heuristics) when it is presumed that the read data (buffers) will be soon needed. 9) Change to filesystem lockmgr locks so that they use LK_NOPAUSE. The delay loop waiting is not useful for filesystem locks, due to the length of the time intervals. 10) Correct and clean-up spec_getpages. 11) Implement a fully functional nfs_getpages, nfs_putpages. 12) Fix nfs_write so that modifications are coherent with the NFS data on the server disk (at least as well as NFS seems to allow.) 13) Properly support MS_INVALIDATE on NFS. 14) Properly pass down MS_INVALIDATE to lower levels of the VM code from vm_map_clean. 15) Better support the notion of pages being busy but valid, so that fewer in-transit waits occur. (use p->busy more for pageouts instead of PG_BUSY.) Since the page is fully valid, it is still usable for reads. 16) It is possible (in error) for cached pages to be busy. Make the page allocation code handle that case correctly. (It should probably be a printf or panic, but I want the system to handle coding errors robustly. I'll probably add a printf.) 17) Correct the design and usage of vm_page_sleep. It didn't handle consistancy problems very well, so make the design a little less lofty. After vm_page_sleep, if it ever blocked, it is still important to relookup the page (if the object generation count changed), and verify it's status (always.) 18) In vm_pageout.c, vm_pageout_clean had rotted, so clean that up. 19) Push the page busy for writes and VM_PROT_READ into vm_pageout_flush. 20) Fix vm_pager_put_pages and it's descendents to support an int flag instead of a boolean, so that we can pass down the invalidate bit.
1998-03-07 21:37:31 +00:00
if (nread < ap->a_count) {
bzero((caddr_t)kva + nread,
ap->a_count - nread);
}
pmap_qremove(kva, pcount);
This mega-commit is meant to fix numerous interrelated problems. There has been some bitrot and incorrect assumptions in the vfs_bio code. These problems have manifest themselves worse on NFS type filesystems, but can still affect local filesystems under certain circumstances. Most of the problems have involved mmap consistancy, and as a side-effect broke the vfs.ioopt code. This code might have been committed seperately, but almost everything is interrelated. 1) Allow (pmap_object_init_pt) prefaulting of buffer-busy pages that are fully valid. 2) Rather than deactivating erroneously read initial (header) pages in kern_exec, we now free them. 3) Fix the rundown of non-VMIO buffers that are in an inconsistent (missing vp) state. 4) Fix the disassociation of pages from buffers in brelse. The previous code had rotted and was faulty in a couple of important circumstances. 5) Remove a gratuitious buffer wakeup in vfs_vmio_release. 6) Remove a crufty and currently unused cluster mechanism for VBLK files in vfs_bio_awrite. When the code is functional, I'll add back a cleaner version. 7) The page busy count wakeups assocated with the buffer cache usage were incorrectly cleaned up in a previous commit by me. Revert to the original, correct version, but with a cleaner implementation. 8) The cluster read code now tries to keep data associated with buffers more aggressively (without breaking the heuristics) when it is presumed that the read data (buffers) will be soon needed. 9) Change to filesystem lockmgr locks so that they use LK_NOPAUSE. The delay loop waiting is not useful for filesystem locks, due to the length of the time intervals. 10) Correct and clean-up spec_getpages. 11) Implement a fully functional nfs_getpages, nfs_putpages. 12) Fix nfs_write so that modifications are coherent with the NFS data on the server disk (at least as well as NFS seems to allow.) 13) Properly support MS_INVALIDATE on NFS. 14) Properly pass down MS_INVALIDATE to lower levels of the VM code from vm_map_clean. 15) Better support the notion of pages being busy but valid, so that fewer in-transit waits occur. (use p->busy more for pageouts instead of PG_BUSY.) Since the page is fully valid, it is still usable for reads. 16) It is possible (in error) for cached pages to be busy. Make the page allocation code handle that case correctly. (It should probably be a printf or panic, but I want the system to handle coding errors robustly. I'll probably add a printf.) 17) Correct the design and usage of vm_page_sleep. It didn't handle consistancy problems very well, so make the design a little less lofty. After vm_page_sleep, if it ever blocked, it is still important to relookup the page (if the object generation count changed), and verify it's status (always.) 18) In vm_pageout.c, vm_pageout_clean had rotted, so clean that up. 19) Push the page busy for writes and VM_PROT_READ into vm_pageout_flush. 20) Fix vm_pager_put_pages and it's descendents to support an int flag instead of a boolean, so that we can pass down the invalidate bit.
1998-03-07 21:37:31 +00:00
gotreqpage = 0;
for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
nextoff = toff + PAGE_SIZE;
m = ap->a_m[i];
m->flags &= ~PG_ZERO;
This mega-commit is meant to fix numerous interrelated problems. There has been some bitrot and incorrect assumptions in the vfs_bio code. These problems have manifest themselves worse on NFS type filesystems, but can still affect local filesystems under certain circumstances. Most of the problems have involved mmap consistancy, and as a side-effect broke the vfs.ioopt code. This code might have been committed seperately, but almost everything is interrelated. 1) Allow (pmap_object_init_pt) prefaulting of buffer-busy pages that are fully valid. 2) Rather than deactivating erroneously read initial (header) pages in kern_exec, we now free them. 3) Fix the rundown of non-VMIO buffers that are in an inconsistent (missing vp) state. 4) Fix the disassociation of pages from buffers in brelse. The previous code had rotted and was faulty in a couple of important circumstances. 5) Remove a gratuitious buffer wakeup in vfs_vmio_release. 6) Remove a crufty and currently unused cluster mechanism for VBLK files in vfs_bio_awrite. When the code is functional, I'll add back a cleaner version. 7) The page busy count wakeups assocated with the buffer cache usage were incorrectly cleaned up in a previous commit by me. Revert to the original, correct version, but with a cleaner implementation. 8) The cluster read code now tries to keep data associated with buffers more aggressively (without breaking the heuristics) when it is presumed that the read data (buffers) will be soon needed. 9) Change to filesystem lockmgr locks so that they use LK_NOPAUSE. The delay loop waiting is not useful for filesystem locks, due to the length of the time intervals. 10) Correct and clean-up spec_getpages. 11) Implement a fully functional nfs_getpages, nfs_putpages. 12) Fix nfs_write so that modifications are coherent with the NFS data on the server disk (at least as well as NFS seems to allow.) 13) Properly support MS_INVALIDATE on NFS. 14) Properly pass down MS_INVALIDATE to lower levels of the VM code from vm_map_clean. 15) Better support the notion of pages being busy but valid, so that fewer in-transit waits occur. (use p->busy more for pageouts instead of PG_BUSY.) Since the page is fully valid, it is still usable for reads. 16) It is possible (in error) for cached pages to be busy. Make the page allocation code handle that case correctly. (It should probably be a printf or panic, but I want the system to handle coding errors robustly. I'll probably add a printf.) 17) Correct the design and usage of vm_page_sleep. It didn't handle consistancy problems very well, so make the design a little less lofty. After vm_page_sleep, if it ever blocked, it is still important to relookup the page (if the object generation count changed), and verify it's status (always.) 18) In vm_pageout.c, vm_pageout_clean had rotted, so clean that up. 19) Push the page busy for writes and VM_PROT_READ into vm_pageout_flush. 20) Fix vm_pager_put_pages and it's descendents to support an int flag instead of a boolean, so that we can pass down the invalidate bit.
1998-03-07 21:37:31 +00:00
if (nextoff <= nread) {
m->valid = VM_PAGE_BITS_ALL;
vm_page_undirty(m);
This mega-commit is meant to fix numerous interrelated problems. There has been some bitrot and incorrect assumptions in the vfs_bio code. These problems have manifest themselves worse on NFS type filesystems, but can still affect local filesystems under certain circumstances. Most of the problems have involved mmap consistancy, and as a side-effect broke the vfs.ioopt code. This code might have been committed seperately, but almost everything is interrelated. 1) Allow (pmap_object_init_pt) prefaulting of buffer-busy pages that are fully valid. 2) Rather than deactivating erroneously read initial (header) pages in kern_exec, we now free them. 3) Fix the rundown of non-VMIO buffers that are in an inconsistent (missing vp) state. 4) Fix the disassociation of pages from buffers in brelse. The previous code had rotted and was faulty in a couple of important circumstances. 5) Remove a gratuitious buffer wakeup in vfs_vmio_release. 6) Remove a crufty and currently unused cluster mechanism for VBLK files in vfs_bio_awrite. When the code is functional, I'll add back a cleaner version. 7) The page busy count wakeups assocated with the buffer cache usage were incorrectly cleaned up in a previous commit by me. Revert to the original, correct version, but with a cleaner implementation. 8) The cluster read code now tries to keep data associated with buffers more aggressively (without breaking the heuristics) when it is presumed that the read data (buffers) will be soon needed. 9) Change to filesystem lockmgr locks so that they use LK_NOPAUSE. The delay loop waiting is not useful for filesystem locks, due to the length of the time intervals. 10) Correct and clean-up spec_getpages. 11) Implement a fully functional nfs_getpages, nfs_putpages. 12) Fix nfs_write so that modifications are coherent with the NFS data on the server disk (at least as well as NFS seems to allow.) 13) Properly support MS_INVALIDATE on NFS. 14) Properly pass down MS_INVALIDATE to lower levels of the VM code from vm_map_clean. 15) Better support the notion of pages being busy but valid, so that fewer in-transit waits occur. (use p->busy more for pageouts instead of PG_BUSY.) Since the page is fully valid, it is still usable for reads. 16) It is possible (in error) for cached pages to be busy. Make the page allocation code handle that case correctly. (It should probably be a printf or panic, but I want the system to handle coding errors robustly. I'll probably add a printf.) 17) Correct the design and usage of vm_page_sleep. It didn't handle consistancy problems very well, so make the design a little less lofty. After vm_page_sleep, if it ever blocked, it is still important to relookup the page (if the object generation count changed), and verify it's status (always.) 18) In vm_pageout.c, vm_pageout_clean had rotted, so clean that up. 19) Push the page busy for writes and VM_PROT_READ into vm_pageout_flush. 20) Fix vm_pager_put_pages and it's descendents to support an int flag instead of a boolean, so that we can pass down the invalidate bit.
1998-03-07 21:37:31 +00:00
} else if (toff < nread) {
/*
* Since this is a VM request, we have to supply the
* unaligned offset to allow vm_page_set_validclean()
* to zero sub-DEV_BSIZE'd portions of the page.
*/
vm_page_set_validclean(m, 0, nread - toff);
This mega-commit is meant to fix numerous interrelated problems. There has been some bitrot and incorrect assumptions in the vfs_bio code. These problems have manifest themselves worse on NFS type filesystems, but can still affect local filesystems under certain circumstances. Most of the problems have involved mmap consistancy, and as a side-effect broke the vfs.ioopt code. This code might have been committed seperately, but almost everything is interrelated. 1) Allow (pmap_object_init_pt) prefaulting of buffer-busy pages that are fully valid. 2) Rather than deactivating erroneously read initial (header) pages in kern_exec, we now free them. 3) Fix the rundown of non-VMIO buffers that are in an inconsistent (missing vp) state. 4) Fix the disassociation of pages from buffers in brelse. The previous code had rotted and was faulty in a couple of important circumstances. 5) Remove a gratuitious buffer wakeup in vfs_vmio_release. 6) Remove a crufty and currently unused cluster mechanism for VBLK files in vfs_bio_awrite. When the code is functional, I'll add back a cleaner version. 7) The page busy count wakeups assocated with the buffer cache usage were incorrectly cleaned up in a previous commit by me. Revert to the original, correct version, but with a cleaner implementation. 8) The cluster read code now tries to keep data associated with buffers more aggressively (without breaking the heuristics) when it is presumed that the read data (buffers) will be soon needed. 9) Change to filesystem lockmgr locks so that they use LK_NOPAUSE. The delay loop waiting is not useful for filesystem locks, due to the length of the time intervals. 10) Correct and clean-up spec_getpages. 11) Implement a fully functional nfs_getpages, nfs_putpages. 12) Fix nfs_write so that modifications are coherent with the NFS data on the server disk (at least as well as NFS seems to allow.) 13) Properly support MS_INVALIDATE on NFS. 14) Properly pass down MS_INVALIDATE to lower levels of the VM code from vm_map_clean. 15) Better support the notion of pages being busy but valid, so that fewer in-transit waits occur. (use p->busy more for pageouts instead of PG_BUSY.) Since the page is fully valid, it is still usable for reads. 16) It is possible (in error) for cached pages to be busy. Make the page allocation code handle that case correctly. (It should probably be a printf or panic, but I want the system to handle coding errors robustly. I'll probably add a printf.) 17) Correct the design and usage of vm_page_sleep. It didn't handle consistancy problems very well, so make the design a little less lofty. After vm_page_sleep, if it ever blocked, it is still important to relookup the page (if the object generation count changed), and verify it's status (always.) 18) In vm_pageout.c, vm_pageout_clean had rotted, so clean that up. 19) Push the page busy for writes and VM_PROT_READ into vm_pageout_flush. 20) Fix vm_pager_put_pages and it's descendents to support an int flag instead of a boolean, so that we can pass down the invalidate bit.
1998-03-07 21:37:31 +00:00
} else {
m->valid = 0;
vm_page_undirty(m);
This mega-commit is meant to fix numerous interrelated problems. There has been some bitrot and incorrect assumptions in the vfs_bio code. These problems have manifest themselves worse on NFS type filesystems, but can still affect local filesystems under certain circumstances. Most of the problems have involved mmap consistancy, and as a side-effect broke the vfs.ioopt code. This code might have been committed seperately, but almost everything is interrelated. 1) Allow (pmap_object_init_pt) prefaulting of buffer-busy pages that are fully valid. 2) Rather than deactivating erroneously read initial (header) pages in kern_exec, we now free them. 3) Fix the rundown of non-VMIO buffers that are in an inconsistent (missing vp) state. 4) Fix the disassociation of pages from buffers in brelse. The previous code had rotted and was faulty in a couple of important circumstances. 5) Remove a gratuitious buffer wakeup in vfs_vmio_release. 6) Remove a crufty and currently unused cluster mechanism for VBLK files in vfs_bio_awrite. When the code is functional, I'll add back a cleaner version. 7) The page busy count wakeups assocated with the buffer cache usage were incorrectly cleaned up in a previous commit by me. Revert to the original, correct version, but with a cleaner implementation. 8) The cluster read code now tries to keep data associated with buffers more aggressively (without breaking the heuristics) when it is presumed that the read data (buffers) will be soon needed. 9) Change to filesystem lockmgr locks so that they use LK_NOPAUSE. The delay loop waiting is not useful for filesystem locks, due to the length of the time intervals. 10) Correct and clean-up spec_getpages. 11) Implement a fully functional nfs_getpages, nfs_putpages. 12) Fix nfs_write so that modifications are coherent with the NFS data on the server disk (at least as well as NFS seems to allow.) 13) Properly support MS_INVALIDATE on NFS. 14) Properly pass down MS_INVALIDATE to lower levels of the VM code from vm_map_clean. 15) Better support the notion of pages being busy but valid, so that fewer in-transit waits occur. (use p->busy more for pageouts instead of PG_BUSY.) Since the page is fully valid, it is still usable for reads. 16) It is possible (in error) for cached pages to be busy. Make the page allocation code handle that case correctly. (It should probably be a printf or panic, but I want the system to handle coding errors robustly. I'll probably add a printf.) 17) Correct the design and usage of vm_page_sleep. It didn't handle consistancy problems very well, so make the design a little less lofty. After vm_page_sleep, if it ever blocked, it is still important to relookup the page (if the object generation count changed), and verify it's status (always.) 18) In vm_pageout.c, vm_pageout_clean had rotted, so clean that up. 19) Push the page busy for writes and VM_PROT_READ into vm_pageout_flush. 20) Fix vm_pager_put_pages and it's descendents to support an int flag instead of a boolean, so that we can pass down the invalidate bit.
1998-03-07 21:37:31 +00:00
}
if (i != ap->a_reqpage) {
/*
* Just in case someone was asking for this page we
* now tell them that it is ok to use.
*/
This mega-commit is meant to fix numerous interrelated problems. There has been some bitrot and incorrect assumptions in the vfs_bio code. These problems have manifest themselves worse on NFS type filesystems, but can still affect local filesystems under certain circumstances. Most of the problems have involved mmap consistancy, and as a side-effect broke the vfs.ioopt code. This code might have been committed seperately, but almost everything is interrelated. 1) Allow (pmap_object_init_pt) prefaulting of buffer-busy pages that are fully valid. 2) Rather than deactivating erroneously read initial (header) pages in kern_exec, we now free them. 3) Fix the rundown of non-VMIO buffers that are in an inconsistent (missing vp) state. 4) Fix the disassociation of pages from buffers in brelse. The previous code had rotted and was faulty in a couple of important circumstances. 5) Remove a gratuitious buffer wakeup in vfs_vmio_release. 6) Remove a crufty and currently unused cluster mechanism for VBLK files in vfs_bio_awrite. When the code is functional, I'll add back a cleaner version. 7) The page busy count wakeups assocated with the buffer cache usage were incorrectly cleaned up in a previous commit by me. Revert to the original, correct version, but with a cleaner implementation. 8) The cluster read code now tries to keep data associated with buffers more aggressively (without breaking the heuristics) when it is presumed that the read data (buffers) will be soon needed. 9) Change to filesystem lockmgr locks so that they use LK_NOPAUSE. The delay loop waiting is not useful for filesystem locks, due to the length of the time intervals. 10) Correct and clean-up spec_getpages. 11) Implement a fully functional nfs_getpages, nfs_putpages. 12) Fix nfs_write so that modifications are coherent with the NFS data on the server disk (at least as well as NFS seems to allow.) 13) Properly support MS_INVALIDATE on NFS. 14) Properly pass down MS_INVALIDATE to lower levels of the VM code from vm_map_clean. 15) Better support the notion of pages being busy but valid, so that fewer in-transit waits occur. (use p->busy more for pageouts instead of PG_BUSY.) Since the page is fully valid, it is still usable for reads. 16) It is possible (in error) for cached pages to be busy. Make the page allocation code handle that case correctly. (It should probably be a printf or panic, but I want the system to handle coding errors robustly. I'll probably add a printf.) 17) Correct the design and usage of vm_page_sleep. It didn't handle consistancy problems very well, so make the design a little less lofty. After vm_page_sleep, if it ever blocked, it is still important to relookup the page (if the object generation count changed), and verify it's status (always.) 18) In vm_pageout.c, vm_pageout_clean had rotted, so clean that up. 19) Push the page busy for writes and VM_PROT_READ into vm_pageout_flush. 20) Fix vm_pager_put_pages and it's descendents to support an int flag instead of a boolean, so that we can pass down the invalidate bit.
1998-03-07 21:37:31 +00:00
if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
if (m->valid) {
if (m->flags & PG_WANTED) {
vm_page_activate(m);
} else {
vm_page_deactivate(m);
}
vm_page_wakeup(m);
This mega-commit is meant to fix numerous interrelated problems. There has been some bitrot and incorrect assumptions in the vfs_bio code. These problems have manifest themselves worse on NFS type filesystems, but can still affect local filesystems under certain circumstances. Most of the problems have involved mmap consistancy, and as a side-effect broke the vfs.ioopt code. This code might have been committed seperately, but almost everything is interrelated. 1) Allow (pmap_object_init_pt) prefaulting of buffer-busy pages that are fully valid. 2) Rather than deactivating erroneously read initial (header) pages in kern_exec, we now free them. 3) Fix the rundown of non-VMIO buffers that are in an inconsistent (missing vp) state. 4) Fix the disassociation of pages from buffers in brelse. The previous code had rotted and was faulty in a couple of important circumstances. 5) Remove a gratuitious buffer wakeup in vfs_vmio_release. 6) Remove a crufty and currently unused cluster mechanism for VBLK files in vfs_bio_awrite. When the code is functional, I'll add back a cleaner version. 7) The page busy count wakeups assocated with the buffer cache usage were incorrectly cleaned up in a previous commit by me. Revert to the original, correct version, but with a cleaner implementation. 8) The cluster read code now tries to keep data associated with buffers more aggressively (without breaking the heuristics) when it is presumed that the read data (buffers) will be soon needed. 9) Change to filesystem lockmgr locks so that they use LK_NOPAUSE. The delay loop waiting is not useful for filesystem locks, due to the length of the time intervals. 10) Correct and clean-up spec_getpages. 11) Implement a fully functional nfs_getpages, nfs_putpages. 12) Fix nfs_write so that modifications are coherent with the NFS data on the server disk (at least as well as NFS seems to allow.) 13) Properly support MS_INVALIDATE on NFS. 14) Properly pass down MS_INVALIDATE to lower levels of the VM code from vm_map_clean. 15) Better support the notion of pages being busy but valid, so that fewer in-transit waits occur. (use p->busy more for pageouts instead of PG_BUSY.) Since the page is fully valid, it is still usable for reads. 16) It is possible (in error) for cached pages to be busy. Make the page allocation code handle that case correctly. (It should probably be a printf or panic, but I want the system to handle coding errors robustly. I'll probably add a printf.) 17) Correct the design and usage of vm_page_sleep. It didn't handle consistancy problems very well, so make the design a little less lofty. After vm_page_sleep, if it ever blocked, it is still important to relookup the page (if the object generation count changed), and verify it's status (always.) 18) In vm_pageout.c, vm_pageout_clean had rotted, so clean that up. 19) Push the page busy for writes and VM_PROT_READ into vm_pageout_flush. 20) Fix vm_pager_put_pages and it's descendents to support an int flag instead of a boolean, so that we can pass down the invalidate bit.
1998-03-07 21:37:31 +00:00
} else {
vm_page_free(m);
}
} else {
vm_page_free(m);
}
} else if (m->valid) {
gotreqpage = 1;
/*
* Since this is a VM request, we need to make the
* entire page presentable by zeroing invalid sections.
*/
if (m->valid != VM_PAGE_BITS_ALL)
vm_page_zero_invalid(m, FALSE);
}
}
This mega-commit is meant to fix numerous interrelated problems. There has been some bitrot and incorrect assumptions in the vfs_bio code. These problems have manifest themselves worse on NFS type filesystems, but can still affect local filesystems under certain circumstances. Most of the problems have involved mmap consistancy, and as a side-effect broke the vfs.ioopt code. This code might have been committed seperately, but almost everything is interrelated. 1) Allow (pmap_object_init_pt) prefaulting of buffer-busy pages that are fully valid. 2) Rather than deactivating erroneously read initial (header) pages in kern_exec, we now free them. 3) Fix the rundown of non-VMIO buffers that are in an inconsistent (missing vp) state. 4) Fix the disassociation of pages from buffers in brelse. The previous code had rotted and was faulty in a couple of important circumstances. 5) Remove a gratuitious buffer wakeup in vfs_vmio_release. 6) Remove a crufty and currently unused cluster mechanism for VBLK files in vfs_bio_awrite. When the code is functional, I'll add back a cleaner version. 7) The page busy count wakeups assocated with the buffer cache usage were incorrectly cleaned up in a previous commit by me. Revert to the original, correct version, but with a cleaner implementation. 8) The cluster read code now tries to keep data associated with buffers more aggressively (without breaking the heuristics) when it is presumed that the read data (buffers) will be soon needed. 9) Change to filesystem lockmgr locks so that they use LK_NOPAUSE. The delay loop waiting is not useful for filesystem locks, due to the length of the time intervals. 10) Correct and clean-up spec_getpages. 11) Implement a fully functional nfs_getpages, nfs_putpages. 12) Fix nfs_write so that modifications are coherent with the NFS data on the server disk (at least as well as NFS seems to allow.) 13) Properly support MS_INVALIDATE on NFS. 14) Properly pass down MS_INVALIDATE to lower levels of the VM code from vm_map_clean. 15) Better support the notion of pages being busy but valid, so that fewer in-transit waits occur. (use p->busy more for pageouts instead of PG_BUSY.) Since the page is fully valid, it is still usable for reads. 16) It is possible (in error) for cached pages to be busy. Make the page allocation code handle that case correctly. (It should probably be a printf or panic, but I want the system to handle coding errors robustly. I'll probably add a printf.) 17) Correct the design and usage of vm_page_sleep. It didn't handle consistancy problems very well, so make the design a little less lofty. After vm_page_sleep, if it ever blocked, it is still important to relookup the page (if the object generation count changed), and verify it's status (always.) 18) In vm_pageout.c, vm_pageout_clean had rotted, so clean that up. 19) Push the page busy for writes and VM_PROT_READ into vm_pageout_flush. 20) Fix vm_pager_put_pages and it's descendents to support an int flag instead of a boolean, so that we can pass down the invalidate bit.
1998-03-07 21:37:31 +00:00
if (!gotreqpage) {
m = ap->a_m[ap->a_reqpage];
#ifndef MAX_PERF
1998-07-11 07:46:16 +00:00
printf(
"spec_getpages:(%s) I/O read failure: (error=%d) bp %p vp %p\n",
devtoname(bp->b_dev), error, bp, bp->b_vp);
1998-07-11 07:46:16 +00:00
printf(
" size: %d, resid: %ld, a_count: %d, valid: 0x%x\n",
size, bp->b_resid, ap->a_count, m->valid);
printf(
" nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n",
nread, ap->a_reqpage, (u_long)m->pindex, pcount);
#endif
/*
* Free the buffer header back to the swap buffer pool.
*/
relpbuf(bp, NULL);
This mega-commit is meant to fix numerous interrelated problems. There has been some bitrot and incorrect assumptions in the vfs_bio code. These problems have manifest themselves worse on NFS type filesystems, but can still affect local filesystems under certain circumstances. Most of the problems have involved mmap consistancy, and as a side-effect broke the vfs.ioopt code. This code might have been committed seperately, but almost everything is interrelated. 1) Allow (pmap_object_init_pt) prefaulting of buffer-busy pages that are fully valid. 2) Rather than deactivating erroneously read initial (header) pages in kern_exec, we now free them. 3) Fix the rundown of non-VMIO buffers that are in an inconsistent (missing vp) state. 4) Fix the disassociation of pages from buffers in brelse. The previous code had rotted and was faulty in a couple of important circumstances. 5) Remove a gratuitious buffer wakeup in vfs_vmio_release. 6) Remove a crufty and currently unused cluster mechanism for VBLK files in vfs_bio_awrite. When the code is functional, I'll add back a cleaner version. 7) The page busy count wakeups assocated with the buffer cache usage were incorrectly cleaned up in a previous commit by me. Revert to the original, correct version, but with a cleaner implementation. 8) The cluster read code now tries to keep data associated with buffers more aggressively (without breaking the heuristics) when it is presumed that the read data (buffers) will be soon needed. 9) Change to filesystem lockmgr locks so that they use LK_NOPAUSE. The delay loop waiting is not useful for filesystem locks, due to the length of the time intervals. 10) Correct and clean-up spec_getpages. 11) Implement a fully functional nfs_getpages, nfs_putpages. 12) Fix nfs_write so that modifications are coherent with the NFS data on the server disk (at least as well as NFS seems to allow.) 13) Properly support MS_INVALIDATE on NFS. 14) Properly pass down MS_INVALIDATE to lower levels of the VM code from vm_map_clean. 15) Better support the notion of pages being busy but valid, so that fewer in-transit waits occur. (use p->busy more for pageouts instead of PG_BUSY.) Since the page is fully valid, it is still usable for reads. 16) It is possible (in error) for cached pages to be busy. Make the page allocation code handle that case correctly. (It should probably be a printf or panic, but I want the system to handle coding errors robustly. I'll probably add a printf.) 17) Correct the design and usage of vm_page_sleep. It didn't handle consistancy problems very well, so make the design a little less lofty. After vm_page_sleep, if it ever blocked, it is still important to relookup the page (if the object generation count changed), and verify it's status (always.) 18) In vm_pageout.c, vm_pageout_clean had rotted, so clean that up. 19) Push the page busy for writes and VM_PROT_READ into vm_pageout_flush. 20) Fix vm_pager_put_pages and it's descendents to support an int flag instead of a boolean, so that we can pass down the invalidate bit.
1998-03-07 21:37:31 +00:00
return VM_PAGER_ERROR;
}
/*
* Free the buffer header back to the swap buffer pool.
*/
relpbuf(bp, NULL);
This mega-commit is meant to fix numerous interrelated problems. There has been some bitrot and incorrect assumptions in the vfs_bio code. These problems have manifest themselves worse on NFS type filesystems, but can still affect local filesystems under certain circumstances. Most of the problems have involved mmap consistancy, and as a side-effect broke the vfs.ioopt code. This code might have been committed seperately, but almost everything is interrelated. 1) Allow (pmap_object_init_pt) prefaulting of buffer-busy pages that are fully valid. 2) Rather than deactivating erroneously read initial (header) pages in kern_exec, we now free them. 3) Fix the rundown of non-VMIO buffers that are in an inconsistent (missing vp) state. 4) Fix the disassociation of pages from buffers in brelse. The previous code had rotted and was faulty in a couple of important circumstances. 5) Remove a gratuitious buffer wakeup in vfs_vmio_release. 6) Remove a crufty and currently unused cluster mechanism for VBLK files in vfs_bio_awrite. When the code is functional, I'll add back a cleaner version. 7) The page busy count wakeups assocated with the buffer cache usage were incorrectly cleaned up in a previous commit by me. Revert to the original, correct version, but with a cleaner implementation. 8) The cluster read code now tries to keep data associated with buffers more aggressively (without breaking the heuristics) when it is presumed that the read data (buffers) will be soon needed. 9) Change to filesystem lockmgr locks so that they use LK_NOPAUSE. The delay loop waiting is not useful for filesystem locks, due to the length of the time intervals. 10) Correct and clean-up spec_getpages. 11) Implement a fully functional nfs_getpages, nfs_putpages. 12) Fix nfs_write so that modifications are coherent with the NFS data on the server disk (at least as well as NFS seems to allow.) 13) Properly support MS_INVALIDATE on NFS. 14) Properly pass down MS_INVALIDATE to lower levels of the VM code from vm_map_clean. 15) Better support the notion of pages being busy but valid, so that fewer in-transit waits occur. (use p->busy more for pageouts instead of PG_BUSY.) Since the page is fully valid, it is still usable for reads. 16) It is possible (in error) for cached pages to be busy. Make the page allocation code handle that case correctly. (It should probably be a printf or panic, but I want the system to handle coding errors robustly. I'll probably add a printf.) 17) Correct the design and usage of vm_page_sleep. It didn't handle consistancy problems very well, so make the design a little less lofty. After vm_page_sleep, if it ever blocked, it is still important to relookup the page (if the object generation count changed), and verify it's status (always.) 18) In vm_pageout.c, vm_pageout_clean had rotted, so clean that up. 19) Push the page busy for writes and VM_PROT_READ into vm_pageout_flush. 20) Fix vm_pager_put_pages and it's descendents to support an int flag instead of a boolean, so that we can pass down the invalidate bit.
1998-03-07 21:37:31 +00:00
return VM_PAGER_OK;
}