Add better support for larger I/O clusters, including larger physical

I/O.  The support is not mature yet, and some of the underlying implementation
needs help.  However, support does exist for IDE devices now.
This commit is contained in:
John Dyson 1998-01-24 02:01:46 +00:00
parent 4147817ab9
commit 50ce7ff499
17 changed files with 207 additions and 61 deletions

View File

@ -35,7 +35,7 @@
* SUCH DAMAGE.
*
* from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
* $Id: machdep.c,v 1.281 1998/01/12 05:16:03 dyson Exp $
* $Id: machdep.c,v 1.282 1998/01/22 17:29:26 dyson Exp $
*/
#include "apm.h"
@ -302,7 +302,7 @@ again:
if( physmem > 1024)
nbuf += min((physmem - 1024) / 8, 2048);
}
nswbuf = max(min(nbuf/4, 128), 16);
nswbuf = max(min(nbuf/4, 64), 16);
valloc(swbuf, struct buf, nswbuf);
valloc(buf, struct buf, nbuf);

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)ufs_bmap.c 8.7 (Berkeley) 3/21/95
* $Id: ufs_bmap.c,v 1.16 1997/09/02 20:06:56 bde Exp $
* $Id: ufs_bmap.c,v 1.17 1997/11/24 16:33:03 bde Exp $
*/
#include <sys/param.h>
@ -46,11 +46,13 @@
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/resourcevar.h>
#include <sys/conf.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <miscfs/specfs/specdev.h>
/*
* Bmap converts a the logical block number of a file to its physical block
@ -124,18 +126,48 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
#endif
if (runp) {
*runp = 0;
}
if (runb) {
*runb = 0;
}
maxrun = 0;
if (runp || runb || (vp->v_maxio == 0)) {
struct vnode *devvp;
int blksize;
blksize = mp->mnt_stat.f_iosize;
/*
* XXX
* If MAXPHYS is the largest transfer the disks can handle,
* we probably want maxrun to be 1 block less so that we
* don't create a block larger than the device can handle.
*/
*runp = 0;
maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1;
}
devvp = ip->i_devvp;
if (runb) {
*runb = 0;
if (devvp && devvp->v_type == VBLK &&
(devvp->v_rdev != NODEV) &&
(major(devvp->v_rdev) < nblkdev)) {
if (bdevsw[major(devvp->v_rdev)]->d_maxio > MAXPHYS) {
maxrun = MAXPHYS;
vp->v_maxio = MAXPHYS;
} else {
maxrun = bdevsw[major(devvp->v_rdev)]->d_maxio;
vp->v_maxio = bdevsw[major(devvp->v_rdev)]->d_maxio;
}
maxrun = maxrun / blksize;
maxrun -= 1;
}
if (maxrun == 0) {
vp->v_maxio = DFLTPHYS;
maxrun = DFLTPHYS / blksize;
maxrun -= 1;
}
}
xap = ap == NULL ? a : ap;

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)ufs_bmap.c 8.7 (Berkeley) 3/21/95
* $Id: ufs_bmap.c,v 1.16 1997/09/02 20:06:56 bde Exp $
* $Id: ufs_bmap.c,v 1.17 1997/11/24 16:33:03 bde Exp $
*/
#include <sys/param.h>
@ -46,11 +46,13 @@
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/resourcevar.h>
#include <sys/conf.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <miscfs/specfs/specdev.h>
/*
* Bmap converts a the logical block number of a file to its physical block
@ -124,18 +126,48 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
#endif
if (runp) {
*runp = 0;
}
if (runb) {
*runb = 0;
}
maxrun = 0;
if (runp || runb || (vp->v_maxio == 0)) {
struct vnode *devvp;
int blksize;
blksize = mp->mnt_stat.f_iosize;
/*
* XXX
* If MAXPHYS is the largest transfer the disks can handle,
* we probably want maxrun to be 1 block less so that we
* don't create a block larger than the device can handle.
*/
*runp = 0;
maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1;
}
devvp = ip->i_devvp;
if (runb) {
*runb = 0;
if (devvp && devvp->v_type == VBLK &&
(devvp->v_rdev != NODEV) &&
(major(devvp->v_rdev) < nblkdev)) {
if (bdevsw[major(devvp->v_rdev)]->d_maxio > MAXPHYS) {
maxrun = MAXPHYS;
vp->v_maxio = MAXPHYS;
} else {
maxrun = bdevsw[major(devvp->v_rdev)]->d_maxio;
vp->v_maxio = bdevsw[major(devvp->v_rdev)]->d_maxio;
}
maxrun = maxrun / blksize;
maxrun -= 1;
}
if (maxrun == 0) {
vp->v_maxio = DFLTPHYS;
maxrun = DFLTPHYS / blksize;
maxrun -= 1;
}
}
xap = ap == NULL ? a : ap;

View File

@ -35,7 +35,7 @@
* SUCH DAMAGE.
*
* from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
* $Id: machdep.c,v 1.281 1998/01/12 05:16:03 dyson Exp $
* $Id: machdep.c,v 1.282 1998/01/22 17:29:26 dyson Exp $
*/
#include "apm.h"
@ -302,7 +302,7 @@ again:
if( physmem > 1024)
nbuf += min((physmem - 1024) / 8, 2048);
}
nswbuf = max(min(nbuf/4, 128), 16);
nswbuf = max(min(nbuf/4, 64), 16);
valloc(swbuf, struct buf, nswbuf);
valloc(buf, struct buf, nbuf);

View File

@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* from: @(#)param.h 5.8 (Berkeley) 6/28/91
* $Id: param.h,v 1.16 1997/08/30 01:22:01 smp Exp smp $
* $Id: param.h,v 1.41 1997/08/30 08:07:50 fsmp Exp $
*/
#ifndef _MACHINE_PARAM_H_
@ -76,7 +76,8 @@
#define DEV_BSIZE (1<<DEV_BSHIFT)
#define BLKDEV_IOSIZE 2048
#define MAXPHYS (64 * 1024) /* max raw I/O transfer size */
#define DFLTPHYS (64 * 1024) /* default max raw I/O transfer size */
#define MAXPHYS (128 * 1024) /* max raw I/O transfer size */
#define IOPAGES 2 /* pages of i/o permission bitmap */
#define UPAGES 2 /* pages of u-area */

View File

@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* from: @(#)wd.c 7.2 (Berkeley) 5/9/91
* $Id: wd.c,v 1.146 1997/12/06 14:27:20 bde Exp $
* $Id: wd.c,v 1.147 1998/01/16 22:13:06 pst Exp $
*/
/* TODO:
@ -2429,6 +2429,8 @@ static void wd_drvinit(void *unused)
{
if( ! wd_devsw_installed ) {
if (wd_bdevsw.d_maxio == 0)
wd_bdevsw.d_maxio = 248 * 512;
bdevsw_add_generic(BDEV_MAJOR,CDEV_MAJOR, &wd_bdevsw);
wd_devsw_installed = 1;
}

View File

@ -16,7 +16,7 @@
* 4. Modifications may be freely made to this file if the above conditions
* are met.
*
* $Id: kern_physio.c,v 1.21 1997/08/26 00:15:04 bde Exp $
* $Id: kern_physio.c,v 1.22 1997/09/02 20:05:40 bde Exp $
*/
#include <sys/param.h>
@ -28,6 +28,7 @@
#include <vm/vm_extern.h>
static void physwakeup __P((struct buf *bp));
static struct buf * phygetvpbuf(dev_t dev, int resid);
int
physio(strategy, bp, dev, rw, minp, uio)
@ -52,7 +53,7 @@ physio(strategy, bp, dev, rw, minp, uio)
curproc->p_flag |= P_PHYSIO;
/* create and build a buffer header for a transfer */
bpa = (struct buf *)getpbuf();
bpa = (struct buf *)phygetvpbuf(dev, uio->uio_resid);
if (!bp_alloc) {
spl = splbio();
while (bp->b_flags & B_BUSY) {
@ -70,12 +71,12 @@ physio(strategy, bp, dev, rw, minp, uio)
*/
sa = bpa->b_data;
bp->b_proc = curproc;
bp->b_dev = dev;
error = bp->b_error = 0;
for(i=0;i<uio->uio_iovcnt;i++) {
while( uio->uio_iov[i].iov_len) {
bp->b_dev = dev;
bp->b_bcount = uio->uio_iov[i].iov_len;
bp->b_flags = B_BUSY | B_PHYS | B_CALL | bufflags;
bp->b_iodone = physwakeup;
@ -168,18 +169,46 @@ u_int
minphys(bp)
struct buf *bp;
{
u_int maxphys = MAXPHYS;
u_int maxphys = DFLTPHYS;
struct bdevsw *bdsw;
int offset;
if( ((vm_offset_t) bp->b_data) & PAGE_MASK) {
maxphys = MAXPHYS - PAGE_SIZE;
bdsw = cdevsw[major(bp->b_dev)]->d_bdev;
if (bdsw && bdsw->d_maxio) {
maxphys = bdsw->d_maxio;
}
if (bp->b_kvasize < maxphys)
maxphys = bp->b_kvasize;
if(((vm_offset_t) bp->b_data) & PAGE_MASK) {
maxphys -= PAGE_SIZE;
}
if( bp->b_bcount > maxphys) {
bp->b_bcount = maxphys;
}
return bp->b_bcount;
}
struct buf *
phygetvpbuf(dev_t dev, int resid)
{
struct bdevsw *bdsw;
int maxio;
bdsw = cdevsw[major(dev)]->d_bdev;
if (bdsw == NULL)
return getpbuf();
maxio = bdsw->d_maxio;
if (resid > maxio)
resid = maxio;
return getpbuf();
}
int
rawread(dev, uio, ioflag)
dev_t dev;

View File

@ -11,7 +11,7 @@
* 2. Absolutely no warranty of function or purpose is made by the author
* John S. Dyson.
*
* $Id: vfs_bio.c,v 1.143 1998/01/17 09:16:26 dyson Exp $
* $Id: vfs_bio.c,v 1.144 1998/01/22 17:29:51 dyson Exp $
*/
/*
@ -1132,17 +1132,26 @@ findkvaspace:
if (vm_map_findspace(buffer_map,
vm_map_min(buffer_map), maxsize, &addr)) {
if (kvafreespace > 0) {
int tfree = 0;
for (bp1 = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
bp1 != NULL; bp1 = TAILQ_NEXT(bp1, b_freelist))
if (bp1->b_kvasize != 0) {
tfree += bp1->b_kvasize;
bremfree(bp1);
bfreekva(bp1);
brelse(bp1);
if (tfree >= maxsize)
goto findkvaspace;
int totfree = 0, freed;
do {
freed = 0;
for (bp1 = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
bp1 != NULL; bp1 = TAILQ_NEXT(bp1, b_freelist)) {
if (bp1->b_kvasize != 0) {
totfree += bp1->b_kvasize;
freed = bp1->b_kvasize;
bremfree(bp1);
bfreekva(bp1);
brelse(bp1);
break;
}
}
} while (freed);
/*
* if we found free space, then retry with the same buffer.
*/
if (totfree)
goto findkvaspace;
}
bp->b_flags |= B_INVAL;
brelse(bp);

View File

@ -33,7 +33,7 @@
* SUCH DAMAGE.
*
* @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94
* $Id: vfs_cluster.c,v 1.49 1997/11/07 08:53:05 phk Exp $
* $Id: vfs_cluster.c,v 1.50 1998/01/06 05:16:01 dyson Exp $
*/
#include <sys/param.h>
@ -92,12 +92,14 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp)
long origtotread;
error = 0;
if (vp->v_maxio == 0)
vp->v_maxio = DFLTPHYS;
/*
* Try to limit the amount of read-ahead by a few
* ad-hoc parameters. This needs work!!!
*/
racluster = MAXPHYS/size;
racluster = vp->v_maxio/size;
maxra = 2 * racluster + (totread / size);
if (maxra > MAXRA)
maxra = MAXRA;
@ -356,11 +358,13 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
bp->b_bufsize = 0;
bp->b_npages = 0;
if (vp->v_maxio == 0)
vp->v_maxio = DFLTPHYS;
inc = btodb(size);
for (bn = blkno, i = 0; i < run; ++i, bn += inc) {
if (i != 0) {
if ((bp->b_npages * PAGE_SIZE) +
round_page(size) > MAXPHYS)
round_page(size) > vp->v_maxio)
break;
if (incore(vp, lbn + i))
@ -492,6 +496,8 @@ cluster_write(bp, filesize)
int async;
vp = bp->b_vp;
if (vp->v_maxio == 0)
vp->v_maxio = DFLTPHYS;
if (vp->v_type == VREG) {
async = vp->v_mount->mnt_flag & MNT_ASYNC;
lblocksize = vp->v_mount->mnt_stat.f_iosize;
@ -507,7 +513,7 @@ cluster_write(bp, filesize)
if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 ||
(bp->b_blkno != vp->v_lasta + btodb(lblocksize))) {
maxclen = MAXPHYS / lblocksize - 1;
maxclen = vp->v_maxio / lblocksize - 1;
if (vp->v_clen != 0) {
/*
* Next block is not sequential.
@ -703,7 +709,7 @@ cluster_wbuild(vp, size, start_lbn, len)
if ((tbp->b_bcount != size) ||
((bp->b_blkno + dbsize * i) != tbp->b_blkno) ||
((tbp->b_npages + bp->b_npages) > (MAXPHYS / PAGE_SIZE))) {
((tbp->b_npages + bp->b_npages) > (vp->v_maxio / PAGE_SIZE))) {
splx(s);
break;
}

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
* $Id: vfs_subr.c,v 1.124 1998/01/17 09:16:28 dyson Exp $
* $Id: vfs_subr.c,v 1.125 1998/01/22 17:29:52 dyson Exp $
*/
/*
@ -458,6 +458,7 @@ getnewvnode(tag, mp, vops, vpp)
vp->v_clen = 0;
vp->v_socket = 0;
vp->v_writecount = 0; /* XXX */
vp->v_maxio = 0;
} else {
simple_unlock(&vnode_free_list_slock);
vp = (struct vnode *) zalloc(vnode_zone);

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
* $Id: vfs_subr.c,v 1.124 1998/01/17 09:16:28 dyson Exp $
* $Id: vfs_subr.c,v 1.125 1998/01/22 17:29:52 dyson Exp $
*/
/*
@ -458,6 +458,7 @@ getnewvnode(tag, mp, vops, vpp)
vp->v_clen = 0;
vp->v_socket = 0;
vp->v_writecount = 0; /* XXX */
vp->v_maxio = 0;
} else {
simple_unlock(&vnode_free_list_slock);
vp = (struct vnode *) zalloc(vnode_zone);

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)conf.h 8.5 (Berkeley) 1/9/95
* $Id: conf.h,v 1.36 1997/09/27 13:39:46 kato Exp $
* $Id: conf.h,v 1.37 1997/11/22 08:35:42 bde Exp $
*/
#ifndef _SYS_CONF_H_
@ -108,6 +108,7 @@ struct bdevsw {
char *d_name; /* name of the driver e.g. audio */
struct cdevsw *d_cdev; /* cross pointer to the cdev */
int d_maj; /* the major number we were assigned */
int d_maxio;
};
#ifdef KERNEL

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)conf.h 8.5 (Berkeley) 1/9/95
* $Id: conf.h,v 1.36 1997/09/27 13:39:46 kato Exp $
* $Id: conf.h,v 1.37 1997/11/22 08:35:42 bde Exp $
*/
#ifndef _SYS_CONF_H_
@ -108,6 +108,7 @@ struct bdevsw {
char *d_name; /* name of the driver e.g. audio */
struct cdevsw *d_cdev; /* cross pointer to the cdev */
int d_maj; /* the major number we were assigned */
int d_maxio;
};
#ifdef KERNEL

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)vnode.h 8.7 (Berkeley) 2/4/94
* $Id: vnode.h,v 1.64 1998/01/12 01:44:08 dyson Exp $
* $Id: vnode.h,v 1.65 1998/01/17 09:16:39 dyson Exp $
*/
#ifndef _SYS_VNODE_H_
@ -107,6 +107,7 @@ struct vnode {
daddr_t v_cstart; /* start block of cluster */
daddr_t v_lasta; /* last allocation */
int v_clen; /* length of current cluster */
int v_maxio; /* maximum I/O cluster size */
struct vm_object *v_object; /* Place to store VM object */
struct simplelock v_interlock; /* lock on usecount and flag */
struct lock *v_vnlock; /* used for non-locking fs's */

View File

@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)lfs_segment.c 8.10 (Berkeley) 6/10/95
* $Id: lfs_segment.c,v 1.25 1997/10/16 11:58:30 phk Exp $
* $Id: lfs_segment.c,v 1.26 1997/12/02 21:07:17 phk Exp $
*/
#include <sys/param.h>
@ -931,7 +931,7 @@ lfs_writeseg(fs, sp)
/*
* When we simply write the blocks we lose a rotation for every block
* written. To avoid this problem, we allocate memory in chunks, copy
* the buffers into the chunk and write the chunk. MAXPHYS is the
* the buffers into the chunk and write the chunk. DFLTPHYS is the
* largest size I/O devices can handle.
* When the data is copied to the chunk, turn off the the B_LOCKED bit
* and brelse the buffer (which will move them to the LRU list). Add
@ -945,16 +945,16 @@ lfs_writeseg(fs, sp)
*/
for (bpp = sp->bpp, i = nblocks; i;) {
cbp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp,
(*bpp)->b_blkno, MAXPHYS);
(*bpp)->b_blkno, DFLTPHYS);
cbp->b_dev = i_dev;
cbp->b_flags |= B_ASYNC | B_BUSY;
cbp->b_bcount = 0;
s = splbio();
++fs->lfs_iocount;
for (p = cbp->b_data; i && cbp->b_bcount < MAXPHYS; i--) {
for (p = cbp->b_data; i && cbp->b_bcount < DFLTPHYS; i--) {
bp = *bpp;
if (bp->b_bcount > (MAXPHYS - cbp->b_bcount))
if (bp->b_bcount > (DFLTPHYS - cbp->b_bcount))
break;
bpp++;

View File

@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)ufs_bmap.c 8.7 (Berkeley) 3/21/95
* $Id: ufs_bmap.c,v 1.16 1997/09/02 20:06:56 bde Exp $
* $Id: ufs_bmap.c,v 1.17 1997/11/24 16:33:03 bde Exp $
*/
#include <sys/param.h>
@ -46,11 +46,13 @@
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/resourcevar.h>
#include <sys/conf.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <miscfs/specfs/specdev.h>
/*
* Bmap converts a the logical block number of a file to its physical block
@ -124,18 +126,48 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
#endif
if (runp) {
*runp = 0;
}
if (runb) {
*runb = 0;
}
maxrun = 0;
if (runp || runb || (vp->v_maxio == 0)) {
struct vnode *devvp;
int blksize;
blksize = mp->mnt_stat.f_iosize;
/*
* XXX
* If MAXPHYS is the largest transfer the disks can handle,
* we probably want maxrun to be 1 block less so that we
* don't create a block larger than the device can handle.
*/
*runp = 0;
maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1;
}
devvp = ip->i_devvp;
if (runb) {
*runb = 0;
if (devvp && devvp->v_type == VBLK &&
(devvp->v_rdev != NODEV) &&
(major(devvp->v_rdev) < nblkdev)) {
if (bdevsw[major(devvp->v_rdev)]->d_maxio > MAXPHYS) {
maxrun = MAXPHYS;
vp->v_maxio = MAXPHYS;
} else {
maxrun = bdevsw[major(devvp->v_rdev)]->d_maxio;
vp->v_maxio = bdevsw[major(devvp->v_rdev)]->d_maxio;
}
maxrun = maxrun / blksize;
maxrun -= 1;
}
if (maxrun == 0) {
vp->v_maxio = DFLTPHYS;
maxrun = DFLTPHYS / blksize;
maxrun -= 1;
}
}
xap = ap == NULL ? a : ap;

View File

@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
* $Id: vm_pager.c,v 1.30 1997/10/12 20:26:31 phk Exp $
* $Id: vm_pager.c,v 1.31 1997/12/29 00:25:06 dyson Exp $
*/
/*
@ -136,13 +136,11 @@ vm_pager_bufferinit()
/*
* Now set up swap and physical I/O buffer headers.
*/
for (i = 0; i < nswbuf - 1; i++, bp++) {
for (i = 0; i < nswbuf; i++, bp++) {
TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist);
bp->b_rcred = bp->b_wcred = NOCRED;
bp->b_vnbufs.le_next = NOLIST;
}
bp->b_rcred = bp->b_wcred = NOCRED;
bp->b_vnbufs.le_next = NOLIST;
swapbkva = kmem_alloc_pageable(pager_map, nswbuf * MAXPHYS);
if (!swapbkva)