mirror of
https://git.FreeBSD.org/src.git
synced 2024-12-16 10:20:30 +00:00
Rewrite parity maintenance functions.
revive_block: Correct bug introduced in revision 1.25 which caused Add fields to vinum_ioctl_msgexcessive concurrent requests followed by system death.
This commit is contained in:
parent
78f25040af
commit
9b8f5e3c15
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=60357
@ -37,7 +37,7 @@
|
||||
* otherwise) arising in any way out of the use of this software, even if
|
||||
* advised of the possibility of such damage.
|
||||
*
|
||||
* $Id: vinumrevive.c,v 1.10 2000/01/03 03:40:54 grog Exp grog $
|
||||
* $Id: vinumrevive.c,v 1.13 2000/05/10 22:43:01 grog Exp grog $
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
@ -52,6 +52,7 @@
|
||||
* plex (which means a programming error if we get
|
||||
* here at all; FIXME).
|
||||
*/
|
||||
|
||||
int
|
||||
revive_block(int sdno)
|
||||
{
|
||||
@ -156,7 +157,7 @@ revive_block(int sdno)
|
||||
/* Don't need that bp after all, we'll get a new one. */
|
||||
bp->b_flags |= B_INVAL;
|
||||
brelse(bp); /* is this kosher? */
|
||||
bp = parityrebuild(plex, plexblkno, size, 0, &lock); /* do the grunt work */
|
||||
bp = parityrebuild(plex, plexblkno, size, rebuildparity, &lock, NULL); /* do the grunt work */
|
||||
if (bp == NULL) /* no buffer space */
|
||||
return ENOMEM; /* chicken out */
|
||||
} else { /* data block */
|
||||
@ -182,6 +183,7 @@ revive_block(int sdno)
|
||||
/* Now write to the subdisk */
|
||||
{
|
||||
bp->b_dev = VINUM_SD(sdno); /* create the device number */
|
||||
bp->b_flags &= ~B_DONE; /* no longer done */
|
||||
bp->b_ioflags = BIO_ORDERED; /* and make this an ordered write */
|
||||
bp->b_iocmd = BIO_WRITE;
|
||||
BUF_LOCKINIT(bp); /* get a lock for the buffer */
|
||||
@ -253,120 +255,128 @@ revive_block(int sdno)
|
||||
* called repeatedly from userland.
|
||||
*/
|
||||
void
|
||||
parityops(struct vinum_ioctl_msg *data, enum parityop op)
|
||||
parityops(struct vinum_ioctl_msg *data)
|
||||
{
|
||||
int plexno;
|
||||
struct plex *plex;
|
||||
int size; /* I/O transfer size, bytes */
|
||||
int i;
|
||||
int stripe; /* stripe number in plex */
|
||||
int psd; /* parity subdisk number */
|
||||
struct rangelock *lock; /* lock on stripe */
|
||||
struct _ioctl_reply *reply;
|
||||
u_int64_t *pstripep; /* pointer to our stripe counter */
|
||||
off_t pstripe; /* pointer to our stripe counter */
|
||||
struct buf *pbp;
|
||||
off_t errorloc; /* offset of parity error */
|
||||
enum parityop op; /* operation to perform */
|
||||
|
||||
pbp = NULL;
|
||||
plexno = data->index;
|
||||
op = data->op;
|
||||
pbp = NULL;
|
||||
reply = (struct _ioctl_reply *) data;
|
||||
reply->error = EAGAIN; /* expect to repeat this call */
|
||||
reply->msg[0] = '\0';
|
||||
plex = &PLEX[plexno];
|
||||
if (!isparity(plex)) { /* not RAID-4 or RAID-5 */
|
||||
reply->error = EINVAL;
|
||||
return;
|
||||
}
|
||||
if (op == rebuildparity) /* point to our counter */
|
||||
pstripep = &plex->rebuildblock;
|
||||
else
|
||||
pstripep = &plex->checkblock;
|
||||
stripe = *pstripep / plex->stripesize; /* stripe number */
|
||||
pstripe = data->offset;
|
||||
stripe = pstripe / plex->stripesize; /* stripe number */
|
||||
psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */
|
||||
size = min(DEFAULT_REVIVE_BLOCKSIZE, /* one block at a time */
|
||||
plex->stripesize << DEV_BSHIFT);
|
||||
|
||||
pbp = parityrebuild(plex, *pstripep, size, op == checkparity, &lock); /* do the grunt work */
|
||||
if (pbp == NULL) /* no buffer space */
|
||||
pbp = parityrebuild(plex, pstripe, size, op, &lock, &errorloc); /* do the grunt work */
|
||||
if (pbp == NULL) { /* no buffer space */
|
||||
reply->error = ENOMEM;
|
||||
return; /* chicken out */
|
||||
|
||||
}
|
||||
/*
|
||||
* Now we have a result in the data buffer of
|
||||
* the parity buffer header, which we have kept.
|
||||
* Decide what to do with it.
|
||||
*/
|
||||
reply->msg[0] = '\0'; /* until shown otherwise */
|
||||
if ((pbp->b_ioflags & BIO_ERROR) == 0) { /* no error */
|
||||
if (op == checkparity) {
|
||||
int *parity_buf;
|
||||
int isize;
|
||||
|
||||
parity_buf = (int *) pbp->b_data;
|
||||
isize = pbp->b_bcount / sizeof(int);
|
||||
for (i = 0; i < isize; i++) {
|
||||
if (parity_buf[i] != 0) {
|
||||
reply->error = EIO;
|
||||
sprintf(reply->msg,
|
||||
"Parity incorrect at offset 0x%lx\n",
|
||||
(u_long) (*pstripep << DEV_BSHIFT) * (plex->subdisks - 1)
|
||||
+ i * sizeof(int));
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else { /* rebuildparity */
|
||||
if ((op == rebuildparity)
|
||||
|| (op == rebuildandcheckparity)) {
|
||||
pbp->b_iocmd = BIO_WRITE;
|
||||
pbp->b_resid = pbp->b_bcount;
|
||||
BUF_LOCKINIT(pbp); /* get a lock for the buffer */
|
||||
BUF_LOCK(pbp, LK_EXCLUSIVE); /* and lock it */
|
||||
sdio(pbp); /* perform the I/O */
|
||||
sdio(pbp); /* write the parity block */
|
||||
bufwait(pbp);
|
||||
}
|
||||
if (((op == checkparity)
|
||||
|| (op == rebuildandcheckparity))
|
||||
&& (errorloc != -1)) {
|
||||
if (op == checkparity)
|
||||
reply->error = EIO;
|
||||
sprintf(reply->msg,
|
||||
"Parity incorrect at offset 0x%llx\n",
|
||||
errorloc);
|
||||
}
|
||||
if (reply->error == EAGAIN) { /* still OK, */
|
||||
*pstripep += (pbp->b_bcount >> DEV_BSHIFT); /* moved this much further down */
|
||||
if (*pstripep >= SD[plex->sdnos[0]].sectors) { /* finished */
|
||||
*pstripep = 0;
|
||||
plex->checkblock = pstripe + (pbp->b_bcount >> DEV_BSHIFT); /* moved this much further down */
|
||||
if (pstripe >= SD[plex->sdnos[0]].sectors) { /* finished */
|
||||
plex->checkblock = 0;
|
||||
reply->error = 0;
|
||||
}
|
||||
}
|
||||
if (pbp->b_ioflags & BIO_ERROR)
|
||||
reply->error = pbp->b_error;
|
||||
pbp->b_flags |= B_INVAL;
|
||||
pbp->b_ioflags &= ~BIO_ERROR;
|
||||
brelse(pbp);
|
||||
|
||||
}
|
||||
if (pbp->b_ioflags & BIO_ERROR)
|
||||
reply->error = pbp->b_error;
|
||||
pbp->b_flags |= B_INVAL;
|
||||
pbp->b_ioflags &= ~BIO_ERROR;
|
||||
brelse(pbp);
|
||||
unlockrange(plexno, lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Rebuild a parity stripe. Return pointer to
|
||||
* parity bp. On return, the band is locked. The
|
||||
* caller must unlock the band and release the
|
||||
* buffer header.
|
||||
* parity bp. On return,
|
||||
*
|
||||
* 1. The band is locked. The caller must unlock
|
||||
* the band and release the buffer header.
|
||||
*
|
||||
* 2. All buffer headers except php have been
|
||||
* released. The caller must release pbp.
|
||||
*
|
||||
* 3. For checkparity and rebuildandcheckparity,
|
||||
* the parity is compared with the current
|
||||
* parity block. If it's different, the
|
||||
* offset of the error is returned to
|
||||
* errorloc. The caller can set the value of
|
||||
* the pointer to NULL if this is called for
|
||||
* rebuilding parity.
|
||||
*/
|
||||
struct buf *
|
||||
parityrebuild(struct plex *plex,
|
||||
u_int64_t pstripe,
|
||||
int size,
|
||||
int check, /* 1 if only checking */
|
||||
struct rangelock **lockp)
|
||||
enum parityop op,
|
||||
struct rangelock **lockp,
|
||||
off_t * errorloc)
|
||||
{
|
||||
int error;
|
||||
int s;
|
||||
int sdno;
|
||||
int stripe; /* stripe number */
|
||||
int *parity_buf; /* the address supplied by geteblk */
|
||||
int *parity_buf; /* buffer address for current parity block */
|
||||
int *newparity_buf; /* and for new parity block */
|
||||
int mysize; /* I/O transfer size for this transfer */
|
||||
int isize; /* mysize in ints */
|
||||
int i;
|
||||
int psd; /* parity subdisk number */
|
||||
int newpsd; /* and "subdisk number" of new parity */
|
||||
struct buf **bpp; /* pointers to our bps */
|
||||
struct buf *pbp; /* buffer header for parity stripe */
|
||||
int *sbuf;
|
||||
int bufcount; /* number of buffers we need */
|
||||
|
||||
stripe = pstripe / plex->stripesize; /* stripe number */
|
||||
psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */
|
||||
parity_buf = NULL; /* to keep the compiler happy */
|
||||
error = 0;
|
||||
pbp = NULL;
|
||||
|
||||
/*
|
||||
* It's possible that the default transfer size
|
||||
@ -380,38 +390,45 @@ parityrebuild(struct plex *plex,
|
||||
*/
|
||||
mysize = min(size, (plex->stripesize * (stripe + 1) - pstripe) << DEV_BSHIFT);
|
||||
isize = mysize / (sizeof(int)); /* number of ints in the buffer */
|
||||
bufcount = plex->subdisks + 1; /* sd buffers plus result buffer */
|
||||
newpsd = plex->subdisks;
|
||||
bpp = (struct buf **) Malloc(bufcount * sizeof(struct buf *)); /* array of pointers to bps */
|
||||
|
||||
bpp = (struct buf **) Malloc(plex->subdisks * sizeof(struct buf *)); /* array of pointers to bps */
|
||||
|
||||
/* First, issue requests for all subdisks in parallel */
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
|
||||
/* Get a buffer header and initialize it. */
|
||||
s = splbio();
|
||||
bpp[sdno] = geteblk(mysize); /* Get a buffer */
|
||||
if (bpp[sdno] == NULL) {
|
||||
while (sdno-- > 0) { /* release the ones we got */
|
||||
bpp[sdno]->b_flags |= B_INVAL;
|
||||
brelse(bpp[sdno]); /* give back our resources */
|
||||
/* First, build requests for all subdisks */
|
||||
for (sdno = 0; sdno < bufcount; sdno++) { /* for each subdisk */
|
||||
if ((sdno != psd) || (op != rebuildparity)) {
|
||||
/* Get a buffer header and initialize it. */
|
||||
s = splbio();
|
||||
bpp[sdno] = geteblk(mysize); /* Get a buffer */
|
||||
if (bpp[sdno] == NULL) {
|
||||
while (sdno-- > 0) { /* release the ones we got */
|
||||
bpp[sdno]->b_flags |= B_INVAL;
|
||||
brelse(bpp[sdno]); /* give back our resources */
|
||||
}
|
||||
splx(s);
|
||||
printf("vinum: can't allocate buffer space for parity op.\n");
|
||||
return NULL; /* no bpps */
|
||||
}
|
||||
splx(s);
|
||||
printf("vinum: can't allocate buffer space\n");
|
||||
return NULL; /* no bpps */
|
||||
if (sdno == psd)
|
||||
parity_buf = (int *) bpp[sdno]->b_data;
|
||||
if (sdno == newpsd) /* the new one? */
|
||||
bpp[sdno]->b_dev = VINUM_SD(psd); /* write back to the parity SD */
|
||||
else
|
||||
bpp[sdno]->b_dev = VINUM_SD(plex->sdnos[sdno]); /* device number */
|
||||
bpp[sdno]->b_iocmd = BIO_READ; /* either way, read it */
|
||||
bpp[sdno]->b_flags = 0;
|
||||
bpp[sdno]->b_bcount = bpp[sdno]->b_bufsize;
|
||||
bpp[sdno]->b_resid = bpp[sdno]->b_bcount;
|
||||
bpp[sdno]->b_blkno = pstripe; /* transfer from here */
|
||||
}
|
||||
splx(s);
|
||||
if (sdno == psd) {
|
||||
pbp = bpp[sdno];
|
||||
parity_buf = (int *) bpp[sdno]->b_data;
|
||||
if (!check)
|
||||
bzero(parity_buf, mysize);
|
||||
}
|
||||
bpp[sdno]->b_dev = VINUM_SD(plex->sdnos[sdno]); /* device number */
|
||||
bpp[sdno]->b_iocmd = BIO_READ; /* either way, read it */
|
||||
bpp[sdno]->b_flags = 0;
|
||||
bpp[sdno]->b_bcount = bpp[sdno]->b_bufsize;
|
||||
bpp[sdno]->b_resid = bpp[sdno]->b_bcount;
|
||||
bpp[sdno]->b_blkno = pstripe; /* read from here */
|
||||
}
|
||||
|
||||
/* Initialize result buffer */
|
||||
pbp = bpp[newpsd];
|
||||
newparity_buf = (int *) bpp[newpsd]->b_data;
|
||||
bzero(newparity_buf, mysize);
|
||||
|
||||
/*
|
||||
* Now lock the stripe with the first non-parity
|
||||
* bp as locking bp.
|
||||
@ -423,11 +440,11 @@ parityrebuild(struct plex *plex,
|
||||
/*
|
||||
* Then issue requests for all subdisks in
|
||||
* parallel. Don't transfer the parity stripe
|
||||
* if we're rebuilding parity. We have already
|
||||
* initialized it to 0.
|
||||
* if we're rebuilding parity, unless we also
|
||||
* want to check it.
|
||||
*/
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
|
||||
if ((sdno != psd) || check) {
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each real subdisk */
|
||||
if ((sdno != psd) || (op != rebuildparity)) {
|
||||
BUF_LOCKINIT(bpp[sdno]); /* get a lock for the buffer */
|
||||
BUF_LOCK(bpp[sdno], LK_EXCLUSIVE); /* and lock it */
|
||||
sdio(bpp[sdno]);
|
||||
@ -443,35 +460,41 @@ parityrebuild(struct plex *plex,
|
||||
* delay is minimal.
|
||||
*/
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
|
||||
if ((sdno != psd) || check) {
|
||||
if ((sdno != psd) || (op != rebuildparity)) {
|
||||
bufwait(bpp[sdno]);
|
||||
if (bpp[sdno]->b_ioflags & BIO_ERROR) /* can't read, */
|
||||
error = bpp[sdno]->b_error;
|
||||
else if (sdno != psd) { /* update parity */
|
||||
sbuf = (int *) bpp[sdno]->b_data;
|
||||
for (i = 0; i < isize; i++)
|
||||
((int *) newparity_buf)[i] ^= sbuf[i]; /* xor in the buffer */
|
||||
}
|
||||
}
|
||||
if (sdno != psd) { /* release all bps except parity */
|
||||
bpp[sdno]->b_flags |= B_INVAL;
|
||||
brelse(bpp[sdno]); /* give back our resources */
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Finally, do the xors. To save time, we do
|
||||
* the XOR wordwise. This requires sectors to
|
||||
* be a multiple of the length of an int, which
|
||||
* is currently always the case. We do this in
|
||||
* a separate loop to avoid a race condition
|
||||
* with the parity stripe.
|
||||
* If we're checking, compare the calculated
|
||||
* and the read parity block. If they're
|
||||
* different, return the plex-relative offset;
|
||||
* otherwise return -1.
|
||||
*/
|
||||
for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
|
||||
if ((sdno != psd) || check) {
|
||||
if (error == 0) { /* still OK, */
|
||||
sbuf = (int *) bpp[sdno]->b_data;
|
||||
for (i = 0; i < isize; i++)
|
||||
((int *) parity_buf)[i] ^= sbuf[i]; /* xor in the buffer */
|
||||
}
|
||||
if (sdno != psd) { /* release all bps except parity */
|
||||
bpp[sdno]->b_flags |= B_INVAL;
|
||||
brelse(bpp[sdno]); /* give back our resources */
|
||||
if ((op == checkparity)
|
||||
|| (op == rebuildandcheckparity)) {
|
||||
*errorloc = -1; /* no error yet */
|
||||
for (i = 0; i < isize; i++) {
|
||||
if (parity_buf[i] != newparity_buf[i]) {
|
||||
*errorloc = (u_long) (pstripe << DEV_BSHIFT) * (plex->subdisks - 1)
|
||||
+ i * sizeof(int);
|
||||
break;
|
||||
}
|
||||
}
|
||||
bpp[psd]->b_flags |= B_INVAL;
|
||||
brelse(bpp[psd]); /* give back our resources */
|
||||
}
|
||||
|
||||
/* release our resources */
|
||||
Free(bpp);
|
||||
if (error) {
|
||||
@ -483,7 +506,7 @@ parityrebuild(struct plex *plex,
|
||||
|
||||
/*
|
||||
* Initialize a subdisk by writing zeroes to the
|
||||
* complete address space. If check is set,
|
||||
* complete address space. If verify is set,
|
||||
* check each transfer for correctness.
|
||||
*
|
||||
* Each call to this function writes (and maybe
|
||||
|
Loading…
Reference in New Issue
Block a user