From 1943d3438c56f72d63530240675a8f8baf1cbb9c Mon Sep 17 00:00:00 2001 From: Greg Lehey Date: Sat, 7 Aug 1999 08:22:49 +0000 Subject: [PATCH] Import RAID-5 code. Add Cybernet copyright. OK'd-by: Chuck Jacobus --- sys/dev/vinum/vinumraid5.c | 638 +++++++++++++++++++++++++++++++++++++ 1 file changed, 638 insertions(+) create mode 100644 sys/dev/vinum/vinumraid5.c diff --git a/sys/dev/vinum/vinumraid5.c b/sys/dev/vinum/vinumraid5.c new file mode 100644 index 00000000000..0d3af63a6b7 --- /dev/null +++ b/sys/dev/vinum/vinumraid5.c @@ -0,0 +1,638 @@ +/*- + * Copyright (c) 1997, 1998 + * Cybernet Corporation and Nan Yang Computer Services Limited. + * All rights reserved. + * + * This software was developed as part of the NetMAX project. + * + * Written by Greg Lehey + * + * This software is distributed under the so-called ``Berkeley + * License'': + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Cybernet Corporation + * and Nan Yang Computer Services Limited + * 4. Neither the name of the Companies nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * This software is provided ``as is'', and any express or implied + * warranties, including, but not limited to, the implied warranties of + * merchantability and fitness for a particular purpose are disclaimed. + * In no event shall the company or contributors be liable for any + * direct, indirect, incidental, special, exemplary, or consequential + * damages (including, but not limited to, procurement of substitute + * goods or services; loss of use, data, or profits; or business + * interruption) however caused and on any theory of liability, whether + * in contract, strict liability, or tort (including negligence or + * otherwise) arising in any way out of the use of this software, even if + * advised of the possibility of such damage. + * + * $Id: raid5.c,v 1.15 1999/07/07 03:46:01 grog Exp grog $ + */ +/* + * XXX To do: + * + * lock ranges while calculating parity + */ + +#include +#include +#include +#include + +/* + * Parameters which describe the current transfer. + * These are only used for calculation, but they + * need to be passed to other functions, so it's + * tidier to put them in a struct + */ +struct metrics { + daddr_t stripebase; /* base address of stripe (1st subdisk) */ + int stripeoffset; /* offset in stripe */ + int stripesectors; /* total sectors to transfer in this stripe */ + daddr_t sdbase; /* offset in subdisk of stripe base */ + int sdcount; /* number of disks involved in this transfer */ + daddr_t diskstart; /* remember where this transfer starts */ + int psdno; /* number of parity subdisk */ + int badsdno; /* number of down subdisk, if there is one */ + int firstsdno; /* first data subdisk number */ + /* These correspond to the fields in rqelement, sort of */ + int useroffset; + /* + * Initial offset and length values for the first + * data block + */ + int initoffset; /* start address of block to transfer */ + short initlen; /* length in sectors of data transfer */ + /* Define a normal operation */ + int dataoffset; /* start address of block to transfer */ + int datalen; /* length in sectors of data transfer */ + /* Define a group operation */ + int groupoffset; /* subdisk offset of group operation */ + int grouplen; /* length in sectors of group operation */ + /* Define a normal write operation */ + int writeoffset; /* subdisk offset of normal write */ + int writelen; /* length in sectors of write operation */ + enum xferinfo flags; /* to check what we're doing */ + int rqcount; /* number of elements in request */ +}; + +enum requeststatus bre5(struct request *rq, + int plexno, + daddr_t * diskstart, + daddr_t diskend); +void complete_raid5_write(struct rqelement *); +enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex); +void setrqebounds(struct rqelement *rqe, struct metrics *mp); + +/* + * define the low-level requests needed to perform a + * high-level I/O operation for a specific plex 'plexno'. + * + * Return 0 if all subdisks involved in the request are up, 1 if some + * subdisks are not up, and -1 if the request is at least partially + * outside the bounds of the subdisks. + * + * Modify the pointer *diskstart to point to the end address. On + * read, return on the first bad subdisk, so that the caller + * (build_read_request) can try alternatives. + * + * On entry to this routine, the prq structures are not assigned. The + * assignment is performed by expandrq(). Strictly speaking, the + * elements rqe->sdno of all entries should be set to -1, since 0 + * (from bzero) is a valid subdisk number. We avoid this problem by + * initializing the ones we use, and not looking at the others (index + * >= prq->requests). + */ +enum requeststatus +bre5(struct request *rq, + int plexno, + daddr_t * diskaddr, + daddr_t diskend) +{ + struct metrics m; /* most of the information */ + struct sd *sd; + struct plex *plex; + struct buf *bp; /* user's bp */ + struct rqgroup *rqg; /* the request group that we will create */ + struct rqelement *rqe; /* point to this request information */ + int rsectors; /* sectors remaining in this stripe */ + int mysdno; /* another sd index in loops */ + int rqno; /* request number */ + + m.diskstart = *diskaddr; /* start of transfer */ + bp = rq->bp; /* buffer pointer */ + plex = &PLEX[plexno]; /* point to the plex */ + + + while (*diskaddr < diskend) { /* until we get it all sorted out */ + struct rqelement *prqe = NULL; /* XXX */ + m.badsdno = -1; /* no bad subdisk yet */ + + /* Part A: Define the request */ + /* + * First, calculate some sizes: + * The offset of the start address from + * the start of the stripe + */ + m.stripeoffset = *diskaddr % (plex->stripesize * (plex->subdisks - 1)); + + /* + * The plex-relative address of the + * start of the stripe + */ + m.stripebase = *diskaddr - m.stripeoffset; + + /* subdisk containing the parity stripe */ + m.psdno = plex->subdisks - 1 - (*diskaddr / (plex->stripesize * (plex->subdisks - 1))) % plex->subdisks; + + /* + * The number of the subdisk in which + * the start is located + */ + m.firstsdno = m.stripeoffset / plex->stripesize; + if (m.firstsdno >= m.psdno) /* at or past parity sd */ + m.firstsdno++; /* increment it */ + + /* + * The offset from the beginning of + * the stripe on this subdisk + */ + m.initoffset = m.stripeoffset % plex->stripesize; + + /* The offset of the stripe start relative to this subdisk */ + m.sdbase = m.stripebase / (plex->subdisks - 1); + + m.useroffset = *diskaddr - m.diskstart; /* The offset of the start in the user buffer */ + + /* + * The number of sectors to transfer in the + * current (first) subdisk + */ + m.initlen = min(diskend - *diskaddr, /* the amount remaining to transfer */ + plex->stripesize - m.initoffset); /* and the amount left in this block */ + + /* + * The number of sectors to transfer in this stripe + * is the minumum of the amount remaining to transfer + * and the amount left in this stripe + */ + m.stripesectors = min(diskend - *diskaddr, + plex->stripesize * (plex->subdisks - 1) - m.stripeoffset); + + /* The number of data subdisks involved in this request */ + m.sdcount = (m.stripesectors + m.initoffset + plex->stripesize - 1) / plex->stripesize; + + /* Part B: decide what kind of transfer this will be */ + /* + * start and end addresses of the transfer in + * the current block. + * + * There are a number of different kinds of transfer, each of which relates to a + * specific subdisk: + * + * 1. Normal read. All participating subdisks are up, and the transfer can be + * made directly to the user buffer. The bounds of the transfer are described + * by m.dataoffset and m.datalen. We have already calculated m.initoffset and + * m.initlen, which define the parameters for the first data block. + * + * 2. Recovery read. One participating subdisk is down. To recover data, all + * the other subdisks, including the parity subdisk, must be read. The data is + * recovered by exclusive-oring all the other blocks. The bounds of the transfer + * are described by m.groupoffset and m.grouplen. + * + * 3. A read request may request reading both available data (normal read) and + * non-available data (recovery read). This can be a problem if the address ranges + * of the two reads do not coincide: in this case, the normal read needs to be + * extended to cover the address range of the recovery read, and must thus be + * performed out of malloced memory. + * + * 4. Normal write. All the participating subdisks are up. The bounds of the transfer + * are described by m.dataoffset and m.datalen. Since these values differ for each + * block, we calculate the bounds for the parity block independently as the maximum + * of the individual blocks and store these values in m.writeoffset and m.writelen. + * This write proceeds in four phases: + * + * i. Read the old contents of each block and the parity block. + * + * ii. ``Remove'' the old contents from the parity block with exclusive or. + * + * iii. ``Insert'' the new contents of the block in the parity block, again with + * exclusive or. + * + * iv. Write the new contents of the data blocks and the parity block. The data block + * transfers can be made directly from the user buffer. + * + * 5. Degraded write where the data block is not available. The bounds of the + * transfer are described by m.groupoffset and m.grouplen. This requires the + * following steps: + * + * i. Read in all the other data blocks, excluding the parity block. + * + * ii. Recreate the parity block from the other data blocks and the data to be written. + * + * iii. Write the parity block. + * + * 6. Parityless write, a write where the parity block is not available. This + * is in fact the simplest: just write the data blocks. This can proceed directly + * from the user buffer. The bounds of the transfer are described + * by m.dataoffset and m.datalen. + * + * 7. Combination of degraded data block write and normal write. In this case the + * address ranges of the reads may also need to be extended to cover all + * participating blocks. + * + * All requests in a group transfer transfer the same address range relative + * to their subdisk. The individual transfers may vary, but since our group of + * requests is all in a single slice, we can define a range in which they all + * fall. + * + * In the following code section, we determine which kind of transfer we will perform. + * If there is a group transfer, we also decide its bounds relative to the subdisks. + * At the end, we have the following values: + * + * m.flags indicates the kinds of transfers we will perform + * m.initoffset indicates the offset of the beginning of any data + * operation relative to the beginning of the stripe base. + * m.initlen specifies the length of any data operation. + * m.dataoffset contains the same value as m.initoffset. + * m.datalen contains the same value as m.initlen. Initially + * dataoffset and datalen describe the parameters for the first + * data block; while building the data block requests, they are + * updated for each block. + * m.groupoffset indicates the offset of any group operation relative + * to the beginning of the stripe base + * m.grouplen specifies the length of any group operation + * m.writeoffset indicates the offset of a normal write relative + * to the beginning of the stripe base. This value differs from + * m.dataoffset in that it applies to the entire operation, and + * not just the first block. + * m.writelen specifies the total span of a normal write operation. + * writeoffset and writelen are used to define the parity block. + */ + m.groupoffset = 0; /* assume no group... */ + m.grouplen = 0; /* until we know we have one */ + m.writeoffset = m.initoffset; /* start offset of transfer */ + m.writelen = 0; /* nothing to write yet */ + m.flags = 0; /* no flags yet */ + rsectors = m.stripesectors; /* remaining sectors to examine */ + m.dataoffset = m.initoffset; /* start at the beginning of the transfer */ + m.datalen = m.initlen; + + if (m.sdcount > 1) { + plex->multiblock++; /* more than one block for the request */ + /* + * If we have two transfers that don't overlap, + * (one at the end of the first block, the other + * at the beginning of the second block), + * it's cheaper to split them + */ + if (rsectors < plex->stripesize) { + m.sdcount = 1; /* just one subdisk */ + m.stripesectors = m.initlen; /* and just this many sectors */ + rsectors = m.initlen; /* and in the loop counter */ + } + } + if (SD[plex->sdnos[m.psdno]].state < sd_reborn) /* is our parity subdisk down? */ + m.badsdno = m.psdno; /* note that it's down */ + if (bp->b_flags & B_READ) { /* read operation */ + for (mysdno = m.firstsdno; rsectors > 0; mysdno++) { + if (mysdno == m.psdno) /* ignore parity on read */ + mysdno++; + if (mysdno == plex->subdisks) /* wraparound */ + mysdno = 0; + if (mysdno == m.psdno) /* parity, */ + mysdno++; /* we've given already */ + + if (SD[plex->sdnos[mysdno]].state < sd_reborn) { /* got a bad subdisk, */ + if (m.badsdno >= 0) /* we had one already, */ + /* + * XXX be cleverer here. We can still + * read what we can read. + */ + return REQUEST_DOWN; /* we can't take a second */ + m.badsdno = mysdno; /* got the first */ + m.groupoffset = m.dataoffset; /* define the bounds */ + m.grouplen = m.datalen; + m.flags |= XFR_RECOVERY_READ; /* we need recovery */ + plex->recovered_reads++; /* count another one */ + } else + m.flags |= XFR_NORMAL_READ; /* normal read */ + + /* Update the pointers for the next block */ + m.dataoffset = 0; /* back to the start of the stripe */ + rsectors -= m.datalen; /* remaining sectors to examine */ + m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */ + } + } else { /* write operation */ + for (mysdno = m.firstsdno; rsectors > 0; mysdno++) { + if (mysdno == m.psdno) /* parity stripe, we've dealt with that */ + mysdno++; + if (mysdno == plex->subdisks) /* wraparound */ + mysdno = 0; + if (mysdno == m.psdno) /* parity, */ + mysdno++; /* we've given already */ + + sd = &SD[plex->sdnos[mysdno]]; + if (sd->state != sd_up) { + enum requeststatus s; + + s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */ + if (s && (m.badsdno >= 0)) { /* second bad disk, */ + int sdno; + /* + * If the parity disk is down, there's + * no recovery. We make all involved + * subdisks stale. Otherwise, we + * should be able to recover, but it's + * like pulling teeth. Fix it later. + * + * XXX be cleverer here. We should + * still write what we can write. + */ + for (sdno = 0; sdno < m.sdcount; sdno++) { + struct sd *sd = &SD[plex->sdnos[sdno]]; + if (sd->state >= sd_reborn) /* sort of up, */ + set_sd_state(sd->sdno, sd_stale, setstate_force); /* make it stale */ + } + return s; /* and crap out */ + } + m.badsdno = mysdno; /* note which one is bad */ + m.flags |= XFR_DEGRADED_WRITE; /* we need recovery */ + plex->degraded_writes++; /* count another one */ + m.groupoffset = m.dataoffset; /* define the bounds */ + m.grouplen = m.datalen; + } else { + m.flags |= XFR_NORMAL_WRITE; /* normal write operation */ + if (m.writeoffset > m.dataoffset) { /* move write operation lower */ + m.writelen = max(m.writeoffset + m.writelen, + m.dataoffset + m.datalen) + - m.dataoffset; + m.writeoffset = m.dataoffset; + } else + m.writelen = max(m.writeoffset + m.writelen, + m.dataoffset + m.datalen) + - m.writeoffset; + } + + /* Update the pointers for the next block */ + m.dataoffset = 0; /* back to the start of the stripe */ + rsectors -= m.datalen; /* remaining sectors to examine */ + m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */ + } + if (m.badsdno == m.psdno) { /* got a bad parity block, */ + struct sd *psd = &SD[plex->sdnos[m.psdno]]; + + if (psd->state == sd_down) + set_sd_state(psd->sdno, sd_obsolete, setstate_force); /* it's obsolete now */ + else if (psd->state == sd_crashed) + set_sd_state(psd->sdno, sd_stale, setstate_force); /* it's stale now */ + m.flags &= ~XFR_NORMAL_WRITE; /* this write isn't normal, */ + m.flags |= XFR_PARITYLESS_WRITE; /* it's parityless */ + plex->parityless_writes++; /* count another one */ + } + } + + /* reset the initial transfer values */ + m.dataoffset = m.initoffset; /* start at the beginning of the transfer */ + m.datalen = m.initlen; + + /* + * XXX see if we can satisfy a recovery_read from a + * different plex. If so, return from here with no requests WRITEME + */ + + /* decide how many requests we need */ + if (m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE)) /* doing a recovery read or degraded write, */ + m.rqcount = plex->subdisks; /* all subdisks */ + else if (m.flags & XFR_NORMAL_WRITE) /* normal write, */ + m.rqcount = m.sdcount + 1; /* all data blocks and the parity block */ + else /* parityless write or normal read */ + m.rqcount = m.sdcount; /* just the data blocks */ + + /* Part C: build the requests */ + rqg = allocrqg(rq, m.rqcount); /* get a request group */ + if (rqg == NULL) { /* malloc failed */ + bp->b_flags |= B_ERROR; + bp->b_error = ENOMEM; + biodone(bp); + return REQUEST_ENOMEM; + } + rqg->plexno = plexno; + rqg->flags = m.flags; + rqno = 0; /* index in the request group */ + + /* 1: PARITY BLOCK */ + /* + * Are we performing an operation which requires parity? In that case, + * work out the parameters and define the parity block. + * XFR_PARITYOP is XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE + */ + if (m.flags & XFR_PARITYOP) { /* need parity */ + rqe = &rqg->rqe[rqno]; /* point to element */ + sd = &SD[plex->sdnos[m.psdno]]; /* the subdisk in question */ + rqe->rqg = rqg; /* point back to group */ + rqe->flags = (m.flags | XFR_PARITY_BLOCK | XFR_MALLOCED) /* always malloc parity block */ + &~(XFR_NORMAL_READ | XFR_PARITYLESS_WRITE); /* transfer flags without data op stuf */ + setrqebounds(rqe, &m); /* set up the bounds of the transfer */ + rqe->sdno = sd->sdno; /* subdisk number */ + rqe->driveno = sd->driveno; + prqe = rqe; /* debug XXX */ + if (build_rq_buffer(rqe, plex)) /* build the buffer */ + return REQUEST_ENOMEM; /* can't do it */ + rqe->b.b_flags |= B_READ; /* we must read first */ + m.sdcount++; /* adjust the subdisk count */ + rqno++; /* and point to the next request */ + } + /* + * 2: DATA BLOCKS + * Now build up requests for the blocks required + * for individual transfers + */ + for (mysdno = m.firstsdno; rqno < m.sdcount; mysdno++, rqno++) { + if (mysdno == m.psdno) /* parity, */ + mysdno++; /* we've given already */ + if (mysdno == plex->subdisks) /* got to the end, */ + mysdno = 0; /* wrap around */ + if (mysdno == m.psdno) /* parity, */ + mysdno++; /* we've given already */ + + rqe = &rqg->rqe[rqno]; /* point to element */ + sd = &SD[plex->sdnos[mysdno]]; /* the subdisk in question */ + rqe->rqg = rqg; /* point to group */ + if (m.flags & XFR_NEEDS_MALLOC) /* we need a malloced buffer first */ + rqe->flags = m.flags | XFR_DATA_BLOCK | XFR_MALLOCED; /* transfer flags */ + else + rqe->flags = m.flags | XFR_DATA_BLOCK; /* transfer flags */ + if (mysdno == m.badsdno) { /* this is the bad subdisk */ + rqg->badsdno = rqno; /* note which one */ + rqe->flags |= XFR_BAD_SUBDISK; /* note that it's dead */ + /* + * we can't read or write from/to it, + * but we don't need to malloc + */ + rqe->flags &= ~(XFR_MALLOCED | XFR_NORMAL_READ | XFR_NORMAL_WRITE); + } + setrqebounds(rqe, &m); /* set up the bounds of the transfer */ +#if VINUMDEBUG + if (prqe + && (rqe->groupoffset + rqe->sdoffset) < prqe->sdoffset) /* XXX */ + Debugger("Low data block"); /* XXX */ +#endif + rqe->useroffset = m.useroffset; /* offset in user buffer */ + rqe->sdno = sd->sdno; /* subdisk number */ + rqe->driveno = sd->driveno; + if (build_rq_buffer(rqe, plex)) /* build the buffer */ + return REQUEST_ENOMEM; /* can't do it */ + if ((m.flags & XFR_PARITYOP) /* parity operation, */ + &&((m.flags & XFR_BAD_SUBDISK) == 0)) /* and not the bad subdisk, */ + rqe->b.b_flags |= B_READ; /* we must read first */ + + /* Now update pointers for the next block */ + *diskaddr += m.datalen; /* skip past what we've done */ + m.stripesectors -= m.datalen; /* deduct from what's left */ + m.useroffset += m.datalen; /* and move on in the user buffer */ + m.datalen = min(m.stripesectors, plex->stripesize); /* and recalculate */ + m.dataoffset = 0; /* start at the beginning of next block */ + } + + /* + * 3: REMAINING BLOCKS FOR RECOVERY + * Finally, if we have a recovery operation, build + * up transfers for the other subdisks. Follow the + * subdisks around until we get to where we started. + * These requests use only the group parameters. + */ + if ((rqno < m.rqcount) /* haven't done them all already */ + &&(m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE))) { + for (; rqno < m.rqcount; rqno++, mysdno++) { + if (mysdno == m.psdno) /* parity, */ + mysdno++; /* we've given already */ + if (mysdno == plex->subdisks) /* got to the end, */ + mysdno = 0; /* wrap around */ + if (mysdno == m.psdno) /* parity, */ + mysdno++; /* we've given already */ + + rqe = &rqg->rqe[rqno]; /* point to element */ + sd = &SD[plex->sdnos[mysdno]]; /* the subdisk in question */ + rqe->rqg = rqg; /* point to group */ + + rqe->sdoffset = m.sdbase + m.groupoffset; /* start of transfer */ + rqe->dataoffset = 0; /* for tidiness' sake */ + rqe->groupoffset = 0; /* group starts at the beginining */ + rqe->datalen = 0; + rqe->grouplen = m.grouplen; + rqe->buflen = m.grouplen; + rqe->flags = (m.flags | XFR_MALLOCED) & ~XFR_DATAOP; /* transfer flags without data op stuf */ + rqe->sdno = sd->sdno; /* subdisk number */ + rqe->driveno = sd->driveno; + if (build_rq_buffer(rqe, plex)) /* build the buffer */ + return REQUEST_ENOMEM; /* can't do it */ + rqe->b.b_flags |= B_READ; /* we must read first */ + } + } + if (*diskaddr < diskend) /* didn't finish the request on this stripe */ + plex->multistripe++; /* count another one */ + } + return REQUEST_OK; +} + +/* + * Helper function for rqe5: adjust the bounds of the transfers to minimize + * the buffer allocation. + * + * Each request can handle two of three different data ranges: + * + * 1. The range described by the parameters dataoffset and datalen, + * for normal read or parityless write. + * 2. The range described by the parameters groupoffset and grouplen, + * for recovery read and degraded write. + * 3. For normal write, the range depends on the kind of block. For + * data blocks, the range is defined by dataoffset and datalen. For + * parity blocks, it is defined by writeoffset and writelen. + * + * In order not to allocate more memory than necessary, this function + * adjusts the bounds parameter for each request to cover just the minimum + * necessary for the function it performs. This will normally vary from one + * request to the next. + * + * Things are slightly different for the parity block. In this case, the bounds + * defined by mp->writeoffset and mp->writelen also play a rôle. Select this + * case by setting the parameter forparity != 0 + */ +void +setrqebounds(struct rqelement *rqe, struct metrics *mp) +{ + /* parity block of a normal write */ + if ((rqe->flags & (XFR_NORMAL_WRITE | XFR_PARITY_BLOCK)) == (XFR_NORMAL_WRITE | XFR_PARITY_BLOCK)) { /* case 3 */ + if (rqe->flags & XFR_DEGRADED_WRITE) { /* also degraded write */ + /* + * With a combined normal and degraded write, we + * will zero out the area of the degraded write + * in the second phase, so we don't need to read + * it in. Unfortunately, we need a way to tell + * build_request_buffer the size of the buffer, + * and currently that's the length of the read. + * As a result, we read everything, even the stuff + * that we're going to nuke. + * FIXME XXX + */ + if (mp->groupoffset < mp->writeoffset) { /* group operation starts lower */ + rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */ + rqe->dataoffset = mp->writeoffset - mp->groupoffset; /* data starts here */ + rqe->groupoffset = 0; /* and the group at the beginning */ + } else { /* individual data starts first */ + rqe->sdoffset = mp->sdbase + mp->writeoffset; /* start of transfer */ + rqe->dataoffset = 0; /* individual data starts at the beginning */ + rqe->groupoffset = mp->groupoffset - mp->writeoffset; /* group starts here */ + } + rqe->datalen = mp->writelen; + rqe->grouplen = mp->grouplen; + } else { /* just normal write (case 3) */ + rqe->sdoffset = mp->sdbase + mp->writeoffset; /* start of transfer */ + rqe->dataoffset = 0; /* degradation starts at the beginning */ + rqe->groupoffset = 0; /* for tidiness' sake */ + rqe->datalen = mp->writelen; + rqe->grouplen = 0; + } + } else if (rqe->flags & XFR_DATAOP) { /* data operation (case 1 or 3) */ + if (rqe->flags & XFR_GROUPOP) { /* also a group operation (case 2) */ + if (mp->groupoffset < mp->dataoffset) { /* group operation starts lower */ + rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */ + rqe->dataoffset = mp->dataoffset - mp->groupoffset; /* data starts here */ + rqe->groupoffset = 0; /* and the group at the beginning */ + } else { /* individual data starts first */ + rqe->sdoffset = mp->sdbase + mp->dataoffset; /* start of transfer */ + rqe->dataoffset = 0; /* individual data starts at the beginning */ + rqe->groupoffset = mp->groupoffset - mp->dataoffset; /* group starts here */ + } + rqe->datalen = mp->datalen; + rqe->grouplen = mp->grouplen; + } else { /* just data operation (case 1) */ + rqe->sdoffset = mp->sdbase + mp->dataoffset; /* start of transfer */ + rqe->dataoffset = 0; /* degradation starts at the beginning */ + rqe->groupoffset = 0; /* for tidiness' sake */ + rqe->datalen = mp->datalen; + rqe->grouplen = 0; + } + } else { /* just group operations (case 2) */ + rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */ + rqe->dataoffset = 0; /* for tidiness' sake */ + rqe->groupoffset = 0; /* group starts at the beginining */ + rqe->datalen = 0; + rqe->grouplen = mp->grouplen; + } + rqe->buflen = max(rqe->dataoffset + rqe->datalen, /* total buffer length */ + rqe->groupoffset + rqe->grouplen); +}