mirror of
https://git.FreeBSD.org/src.git
synced 2024-12-23 11:18:54 +00:00
Re-vamp how I/O is handled in volumes and plexes.
Analogous to the drive level, give each volume and plex a worker thread that picks up and processes incoming and completed BIOs. This should fix the data corruption issues that have come up a few weeks ago and improve performance, especially of RAID5 plexes. The volume level needs a little work, though.
This commit is contained in:
parent
54516c29e8
commit
67e3ab6ee5
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=135426
@ -70,6 +70,7 @@ int gv_is_striped(struct gv_plex *);
|
||||
int gv_is_open(struct g_geom *);
|
||||
void gv_kill_drive_thread(struct gv_drive *);
|
||||
void gv_kill_plex_thread(struct gv_plex *);
|
||||
void gv_kill_vol_thread(struct gv_volume *);
|
||||
int gv_object_type(struct gv_softc *, char *);
|
||||
void gv_parse_config(struct gv_softc *, u_char *, int);
|
||||
const char *gv_roughlength(off_t, int);
|
||||
|
@ -293,7 +293,7 @@ gv_sync_td(void *arg)
|
||||
* This hack declare this bio as part of an initialization
|
||||
* process, so that the lower levels allow it to get through.
|
||||
*/
|
||||
bp->bio_caller1 = p;
|
||||
bp->bio_cflags |= GV_BIO_SYNCREQ;
|
||||
|
||||
/* Schedule it down ... */
|
||||
g_io_request(bp, to);
|
||||
|
@ -43,6 +43,10 @@ __FBSDID("$FreeBSD$");
|
||||
#include <geom/vinum/geom_vinum_raid5.h>
|
||||
#include <geom/vinum/geom_vinum.h>
|
||||
|
||||
static void gv_plex_completed_request(struct gv_plex *, struct bio *);
|
||||
static void gv_plex_normal_request(struct gv_plex *, struct bio *);
|
||||
static void gv_plex_worker(void *);
|
||||
|
||||
/* XXX: is this the place to catch dying subdisks? */
|
||||
static void
|
||||
gv_plex_orphan(struct g_consumer *cp)
|
||||
@ -76,48 +80,39 @@ gv_plex_orphan(struct g_consumer *cp)
|
||||
g_wither_geom(gp, error);
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
gv_plex_done(struct bio *bp)
|
||||
{
|
||||
struct g_geom *gp;
|
||||
struct gv_sd *s;
|
||||
|
||||
gp = bp->bio_to->geom;
|
||||
struct gv_plex *p;
|
||||
struct gv_bioq *bq;
|
||||
|
||||
s = bp->bio_caller1;
|
||||
KASSERT(s != NULL, ("gv_plex_done: NULL s"));
|
||||
|
||||
if (bp->bio_error == 0)
|
||||
s->initialized += bp->bio_length;
|
||||
|
||||
if (s->initialized >= s->size) {
|
||||
gv_set_sd_state(s, GV_SD_UP, 0);
|
||||
s->initialized = 0;
|
||||
}
|
||||
|
||||
g_std_done(bp);
|
||||
p = bp->bio_from->geom->softc;
|
||||
bp->bio_cflags |= GV_BIO_DONE;
|
||||
bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
|
||||
bq->bp = bp;
|
||||
mtx_lock(&p->bqueue_mtx);
|
||||
TAILQ_INSERT_TAIL(&p->bqueue, bq, queue);
|
||||
wakeup(p);
|
||||
mtx_unlock(&p->bqueue_mtx);
|
||||
}
|
||||
|
||||
/* Find the correct subdisk to send the bio to and build a bio to send. */
|
||||
static int
|
||||
gv_plexbuffer(struct bio *bp, struct bio **bp2, struct g_consumer **cp,
|
||||
caddr_t addr, long bcount, off_t boff)
|
||||
gv_plexbuffer(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff, off_t bcount)
|
||||
{
|
||||
struct g_geom *gp;
|
||||
struct gv_plex *p;
|
||||
struct gv_sd *s;
|
||||
struct bio *cbp;
|
||||
struct bio *cbp, *pbp;
|
||||
int i, sdno;
|
||||
off_t len_left, real_len, real_off, stripeend, stripeno, stripestart;
|
||||
|
||||
s = NULL;
|
||||
|
||||
gp = bp->bio_to->geom;
|
||||
p = gp->softc;
|
||||
off_t len_left, real_len, real_off;
|
||||
off_t stripeend, stripeno, stripestart;
|
||||
|
||||
if (p == NULL || LIST_EMPTY(&p->subdisks))
|
||||
return (ENXIO);
|
||||
|
||||
s = NULL;
|
||||
gp = bp->bio_to->geom;
|
||||
|
||||
/*
|
||||
* We only handle concatenated and striped plexes here. RAID5 plexes
|
||||
* are handled in build_raid5_request().
|
||||
@ -190,10 +185,10 @@ gv_plexbuffer(struct bio *bp, struct bio **bp2, struct g_consumer **cp,
|
||||
break;
|
||||
|
||||
case GV_SD_STALE:
|
||||
if (bp->bio_caller1 != p)
|
||||
if (!(bp->bio_cflags & GV_BIO_SYNCREQ))
|
||||
return (ENXIO);
|
||||
|
||||
printf("FOO: setting sd %s to GV_SD_INITIALIZING\n", s->name);
|
||||
printf("GEOM_VINUM: sd %s is initializing\n", s->name);
|
||||
gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
|
||||
break;
|
||||
|
||||
@ -214,104 +209,366 @@ gv_plexbuffer(struct bio *bp, struct bio **bp2, struct g_consumer **cp,
|
||||
cbp->bio_offset = real_off;
|
||||
cbp->bio_length = real_len;
|
||||
cbp->bio_data = addr;
|
||||
if (bp->bio_caller1 == p) {
|
||||
cbp->bio_caller1 = s;
|
||||
cbp->bio_done = g_std_done;
|
||||
cbp->bio_caller2 = s->consumer;
|
||||
if ((bp->bio_cflags & GV_BIO_SYNCREQ)) {
|
||||
cbp->bio_cflags |= GV_BIO_SYNCREQ;
|
||||
cbp->bio_done = gv_plex_done;
|
||||
} else
|
||||
cbp->bio_done = g_std_done;
|
||||
*bp2 = cbp;
|
||||
*cp = s->consumer;
|
||||
}
|
||||
|
||||
if (bp->bio_driver1 == NULL) {
|
||||
bp->bio_driver1 = cbp;
|
||||
} else {
|
||||
pbp = bp->bio_driver1;
|
||||
while (pbp->bio_caller1 != NULL)
|
||||
pbp = pbp->bio_caller1;
|
||||
pbp->bio_caller1 = cbp;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
gv_plex_start(struct bio *bp)
|
||||
{
|
||||
struct g_geom *gp;
|
||||
struct g_consumer *cp;
|
||||
struct gv_plex *p;
|
||||
struct gv_raid5_packet *wp;
|
||||
struct bio *bp2;
|
||||
caddr_t addr;
|
||||
off_t boff;
|
||||
long bcount, rcount;
|
||||
int err;
|
||||
|
||||
gp = bp->bio_to->geom;
|
||||
p = gp->softc;
|
||||
|
||||
/*
|
||||
* We cannot handle this request if too many of our subdisks are
|
||||
* inaccessible.
|
||||
*/
|
||||
if ((p->state < GV_PLEX_DEGRADED) && (bp->bio_caller1 != p)) {
|
||||
g_io_deliver(bp, ENXIO); /* XXX: correct way? */
|
||||
return;
|
||||
}
|
||||
struct gv_bioq *bq;
|
||||
|
||||
switch(bp->bio_cmd) {
|
||||
case BIO_READ:
|
||||
case BIO_WRITE:
|
||||
case BIO_DELETE:
|
||||
/*
|
||||
* We split up the request in smaller packets and hand them
|
||||
* down to our subdisks.
|
||||
*/
|
||||
wp = NULL;
|
||||
addr = bp->bio_data;
|
||||
boff = bp->bio_offset;
|
||||
for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
|
||||
/*
|
||||
* RAID5 requests usually need to be split up in
|
||||
* several subrequests.
|
||||
*/
|
||||
if (p->org == GV_PLEX_RAID5) {
|
||||
wp = gv_new_raid5_packet();
|
||||
wp->bio = bp;
|
||||
err = gv_build_raid5_req(wp, bp, addr, bcount,
|
||||
boff);
|
||||
} else
|
||||
err = gv_plexbuffer(bp, &bp2, &cp, addr, bcount,
|
||||
boff);
|
||||
|
||||
if (err) {
|
||||
if (p->org == GV_PLEX_RAID5)
|
||||
gv_free_raid5_packet(wp);
|
||||
bp->bio_completed += bcount;
|
||||
if (bp->bio_error == 0)
|
||||
bp->bio_error = err;
|
||||
if (bp->bio_completed == bp->bio_length)
|
||||
g_io_deliver(bp, bp->bio_error);
|
||||
return;
|
||||
}
|
||||
|
||||
if (p->org != GV_PLEX_RAID5) {
|
||||
rcount = bp2->bio_length;
|
||||
g_io_request(bp2, cp);
|
||||
|
||||
/*
|
||||
* RAID5 subrequests are queued on a worklist
|
||||
* and picked up from the worker thread. This
|
||||
* ensures correct order.
|
||||
*/
|
||||
} else {
|
||||
mtx_lock(&p->worklist_mtx);
|
||||
TAILQ_INSERT_TAIL(&p->worklist, wp,
|
||||
list);
|
||||
mtx_unlock(&p->worklist_mtx);
|
||||
wakeup(&p);
|
||||
rcount = wp->length;
|
||||
}
|
||||
|
||||
boff += rcount;
|
||||
addr += rcount;
|
||||
}
|
||||
return;
|
||||
|
||||
break;
|
||||
case BIO_GETATTR:
|
||||
default:
|
||||
g_io_deliver(bp, EOPNOTSUPP);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We cannot handle this request if too many of our subdisks are
|
||||
* inaccessible.
|
||||
*/
|
||||
p = bp->bio_to->geom->softc;
|
||||
if ((p->state < GV_PLEX_DEGRADED) &&
|
||||
!(bp->bio_cflags & GV_BIO_SYNCREQ)) {
|
||||
g_io_deliver(bp, ENXIO);
|
||||
return;
|
||||
}
|
||||
|
||||
bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
|
||||
bq->bp = bp;
|
||||
mtx_lock(&p->bqueue_mtx);
|
||||
TAILQ_INSERT_TAIL(&p->bqueue, bq, queue);
|
||||
wakeup(p);
|
||||
mtx_unlock(&p->bqueue_mtx);
|
||||
}
|
||||
|
||||
static void
|
||||
gv_plex_worker(void *arg)
|
||||
{
|
||||
struct bio *bp;
|
||||
struct gv_plex *p;
|
||||
struct gv_sd *s;
|
||||
struct gv_bioq *bq;
|
||||
|
||||
p = arg;
|
||||
KASSERT(p != NULL, ("NULL p"));
|
||||
|
||||
mtx_lock(&p->bqueue_mtx);
|
||||
for (;;) {
|
||||
/* We were signaled to exit. */
|
||||
if (p->flags & GV_PLEX_THREAD_DIE)
|
||||
break;
|
||||
|
||||
/* Take the first BIO from our queue. */
|
||||
bq = TAILQ_FIRST(&p->bqueue);
|
||||
if (bq == NULL) {
|
||||
msleep(p, &p->bqueue_mtx, PRIBIO, "-", hz/10);
|
||||
continue;
|
||||
}
|
||||
TAILQ_REMOVE(&p->bqueue, bq, queue);
|
||||
mtx_unlock(&p->bqueue_mtx);
|
||||
|
||||
bp = bq->bp;
|
||||
|
||||
/* A completed request. */
|
||||
if (bp->bio_cflags & GV_BIO_DONE) {
|
||||
g_free(bq);
|
||||
if (bp->bio_cflags & GV_BIO_SYNCREQ) {
|
||||
s = bp->bio_to->private;
|
||||
if (bp->bio_error == 0)
|
||||
s->initialized += bp->bio_length;
|
||||
if (s->initialized >= s->size) {
|
||||
g_topology_lock();
|
||||
gv_set_sd_state(s, GV_SD_UP,
|
||||
GV_SETSTATE_CONFIG);
|
||||
g_topology_unlock();
|
||||
s->initialized = 0;
|
||||
}
|
||||
g_std_done(bp);
|
||||
} else
|
||||
gv_plex_completed_request(p, bp);
|
||||
/*
|
||||
* A sub-request that was hold back because it interfered with
|
||||
* another sub-request.
|
||||
*/
|
||||
} else if (bp->bio_cflags & GV_BIO_ONHOLD) {
|
||||
/* Is it still locked out? */
|
||||
if (gv_stripe_active(p, bp)) {
|
||||
mtx_lock(&p->bqueue_mtx);
|
||||
TAILQ_INSERT_TAIL(&p->bqueue, bq, queue);
|
||||
mtx_unlock(&p->bqueue_mtx);
|
||||
} else {
|
||||
g_free(bq);
|
||||
bp->bio_cflags &= ~GV_BIO_ONHOLD;
|
||||
g_io_request(bp, bp->bio_caller2);
|
||||
}
|
||||
|
||||
/* A normal request to this plex. */
|
||||
} else {
|
||||
g_free(bq);
|
||||
gv_plex_normal_request(p, bp);
|
||||
}
|
||||
|
||||
mtx_lock(&p->bqueue_mtx);
|
||||
}
|
||||
mtx_unlock(&p->bqueue_mtx);
|
||||
p->flags |= GV_PLEX_THREAD_DEAD;
|
||||
wakeup(p);
|
||||
|
||||
kthread_exit(ENXIO);
|
||||
}
|
||||
|
||||
void
|
||||
gv_plex_completed_request(struct gv_plex *p, struct bio *bp)
|
||||
{
|
||||
struct bio *cbp, *pbp;
|
||||
struct gv_bioq *bq, *bq2;
|
||||
struct gv_raid5_packet *wp;
|
||||
int i;
|
||||
|
||||
wp = bp->bio_driver1;
|
||||
|
||||
switch (bp->bio_parent->bio_cmd) {
|
||||
case BIO_READ:
|
||||
if (wp == NULL)
|
||||
break;
|
||||
|
||||
TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
|
||||
if (bq->bp == bp) {
|
||||
TAILQ_REMOVE(&wp->bits, bq, queue);
|
||||
g_free(bq);
|
||||
for (i = 0; i < wp->length; i++)
|
||||
wp->data[i] ^= bp->bio_data[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (TAILQ_EMPTY(&wp->bits)) {
|
||||
bp->bio_parent->bio_completed += wp->length;
|
||||
if (wp->lockbase != -1)
|
||||
TAILQ_REMOVE(&p->packets, wp, list);
|
||||
g_free(wp);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case BIO_WRITE:
|
||||
if (wp == NULL)
|
||||
break;
|
||||
|
||||
/* Check if we need to handle parity data. */
|
||||
TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
|
||||
if (bq->bp == bp) {
|
||||
TAILQ_REMOVE(&wp->bits, bq, queue);
|
||||
g_free(bq);
|
||||
cbp = wp->parity;
|
||||
if (cbp != NULL) {
|
||||
for (i = 0; i < wp->length; i++)
|
||||
cbp->bio_data[i] ^=
|
||||
bp->bio_data[i];
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle parity data. */
|
||||
if (TAILQ_EMPTY(&wp->bits)) {
|
||||
if (wp->waiting != NULL) {
|
||||
pbp = wp->waiting;
|
||||
wp->waiting = NULL;
|
||||
cbp = wp->parity;
|
||||
for (i = 0; i < wp->length; i++)
|
||||
cbp->bio_data[i] ^= pbp->bio_data[i];
|
||||
g_io_request(pbp, pbp->bio_caller2);
|
||||
} else if (wp->parity != NULL) {
|
||||
cbp = wp->parity;
|
||||
wp->parity = NULL;
|
||||
g_io_request(cbp, cbp->bio_caller2);
|
||||
} else {
|
||||
bp->bio_parent->bio_completed += wp->length;
|
||||
TAILQ_REMOVE(&p->packets, wp, list);
|
||||
g_free(wp);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
pbp = bp->bio_parent;
|
||||
if (pbp->bio_error == 0)
|
||||
pbp->bio_error = bp->bio_error;
|
||||
|
||||
/* When the original request is finished, we deliver it. */
|
||||
pbp->bio_inbed++;
|
||||
if (pbp->bio_inbed == pbp->bio_children)
|
||||
g_io_deliver(pbp, pbp->bio_error);
|
||||
|
||||
/* Clean up what we allocated. */
|
||||
if (bp->bio_cflags & GV_BIO_MALLOC)
|
||||
g_free(bp->bio_data);
|
||||
g_destroy_bio(bp);
|
||||
}
|
||||
|
||||
void
|
||||
gv_plex_normal_request(struct gv_plex *p, struct bio *bp)
|
||||
{
|
||||
struct bio *cbp, *pbp;
|
||||
struct gv_bioq *bq, *bq2;
|
||||
struct gv_raid5_packet *wp, *wp2;
|
||||
caddr_t addr;
|
||||
off_t bcount, boff;
|
||||
int err;
|
||||
|
||||
bcount = bp->bio_length;
|
||||
addr = bp->bio_data;
|
||||
boff = bp->bio_offset;
|
||||
|
||||
/* Walk over the whole length of the request, we might split it up. */
|
||||
while (bcount > 0) {
|
||||
wp = NULL;
|
||||
|
||||
/*
|
||||
* RAID5 plexes need special treatment, as a single write
|
||||
* request involves several read/write sub-requests.
|
||||
*/
|
||||
if (p->org == GV_PLEX_RAID5) {
|
||||
wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
|
||||
wp->bio = bp;
|
||||
TAILQ_INIT(&wp->bits);
|
||||
|
||||
err = gv_build_raid5_req(p, wp, bp, addr, boff, bcount);
|
||||
|
||||
/*
|
||||
* Building the sub-request failed, we probably need to
|
||||
* clean up a lot.
|
||||
*/
|
||||
if (err) {
|
||||
printf("GEOM_VINUM: plex request failed for ");
|
||||
g_print_bio(bp);
|
||||
printf("\n");
|
||||
TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
|
||||
TAILQ_REMOVE(&wp->bits, bq, queue);
|
||||
g_free(bq);
|
||||
}
|
||||
if (wp->waiting != NULL) {
|
||||
if (wp->waiting->bio_cflags &
|
||||
GV_BIO_MALLOC)
|
||||
g_free(wp->waiting->bio_data);
|
||||
g_destroy_bio(wp->waiting);
|
||||
}
|
||||
if (wp->parity != NULL) {
|
||||
if (wp->parity->bio_cflags &
|
||||
GV_BIO_MALLOC)
|
||||
g_free(wp->parity->bio_data);
|
||||
g_destroy_bio(wp->parity);
|
||||
}
|
||||
g_free(wp);
|
||||
|
||||
TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
|
||||
if (wp->bio == bp) {
|
||||
TAILQ_REMOVE(&p->packets, wp,
|
||||
list);
|
||||
TAILQ_FOREACH_SAFE(bq,
|
||||
&wp->bits, queue, bq2) {
|
||||
TAILQ_REMOVE(&wp->bits,
|
||||
bq, queue);
|
||||
g_free(bq);
|
||||
}
|
||||
g_free(wp);
|
||||
}
|
||||
}
|
||||
|
||||
cbp = bp->bio_driver1;
|
||||
while (cbp != NULL) {
|
||||
pbp = cbp->bio_caller1;
|
||||
if (cbp->bio_cflags & GV_BIO_MALLOC)
|
||||
g_free(cbp->bio_data);
|
||||
g_destroy_bio(cbp);
|
||||
cbp = pbp;
|
||||
}
|
||||
|
||||
g_io_deliver(bp, err);
|
||||
return;
|
||||
}
|
||||
|
||||
if (TAILQ_EMPTY(&wp->bits))
|
||||
g_free(wp);
|
||||
else if (wp->lockbase != -1)
|
||||
TAILQ_INSERT_TAIL(&p->packets, wp, list);
|
||||
|
||||
/*
|
||||
* Requests to concatenated and striped plexes go straight
|
||||
* through.
|
||||
*/
|
||||
} else {
|
||||
err = gv_plexbuffer(p, bp, addr, boff, bcount);
|
||||
|
||||
/* Building the sub-request failed. */
|
||||
if (err) {
|
||||
printf("GEOM_VINUM: plex request failed for ");
|
||||
g_print_bio(bp);
|
||||
printf("\n");
|
||||
cbp = bp->bio_driver1;
|
||||
while (cbp != NULL) {
|
||||
pbp = cbp->bio_caller1;
|
||||
g_destroy_bio(cbp);
|
||||
cbp = pbp;
|
||||
}
|
||||
g_io_deliver(bp, err);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Abuse bio_caller1 as linked list. */
|
||||
pbp = bp->bio_driver1;
|
||||
while (pbp->bio_caller1 != NULL)
|
||||
pbp = pbp->bio_caller1;
|
||||
bcount -= pbp->bio_length;
|
||||
addr += pbp->bio_length;
|
||||
boff += pbp->bio_length;
|
||||
}
|
||||
|
||||
/* Fire off all sub-requests. */
|
||||
pbp = bp->bio_driver1;
|
||||
while (pbp != NULL) {
|
||||
/*
|
||||
* RAID5 sub-requests need to come in correct order, otherwise
|
||||
* we trip over the parity, as it might be overwritten by
|
||||
* another sub-request.
|
||||
*/
|
||||
if (pbp->bio_driver1 != NULL &&
|
||||
gv_stripe_active(p, pbp)) {
|
||||
pbp->bio_cflags |= GV_BIO_ONHOLD;
|
||||
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
|
||||
bq->bp = pbp;
|
||||
mtx_lock(&p->bqueue_mtx);
|
||||
TAILQ_INSERT_TAIL(&p->bqueue, bq, queue);
|
||||
mtx_unlock(&p->bqueue_mtx);
|
||||
} else
|
||||
g_io_request(pbp, pbp->bio_caller2);
|
||||
pbp = pbp->bio_caller1;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
@ -425,16 +682,12 @@ gv_plex_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
|
||||
gp->softc = p;
|
||||
p->geom = gp;
|
||||
|
||||
/* RAID5 plexes need a 'worker' thread, where IO is handled. */
|
||||
if (p->org == GV_PLEX_RAID5) {
|
||||
TAILQ_INIT(&p->worklist);
|
||||
mtx_init(&p->worklist_mtx, "gvinum_worklist", NULL,
|
||||
MTX_DEF);
|
||||
p->flags &= ~GV_PLEX_THREAD_DIE;
|
||||
kthread_create(gv_raid5_worker, gp, NULL, 0, 0,
|
||||
"gv_raid5");
|
||||
p->flags |= GV_PLEX_THREAD_ACTIVE;
|
||||
}
|
||||
TAILQ_INIT(&p->packets);
|
||||
TAILQ_INIT(&p->bqueue);
|
||||
mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
|
||||
kthread_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
|
||||
p->name);
|
||||
p->flags |= GV_PLEX_THREAD_ACTIVE;
|
||||
|
||||
/* Attach a consumer to this provider. */
|
||||
cp = g_new_consumer(gp);
|
||||
|
@ -44,243 +44,62 @@ __FBSDID("$FreeBSD$");
|
||||
#include <geom/vinum/geom_vinum_raid5.h>
|
||||
#include <geom/vinum/geom_vinum.h>
|
||||
|
||||
int gv_raid5_parity(struct gv_raid5_packet *);
|
||||
int gv_stripe_active(struct gv_raid5_packet *, struct gv_plex *);
|
||||
|
||||
struct gv_raid5_bit *
|
||||
gv_new_raid5_bit(void)
|
||||
{
|
||||
struct gv_raid5_bit *r;
|
||||
r = g_malloc(sizeof(*r), M_NOWAIT | M_ZERO);
|
||||
KASSERT(r != NULL, ("gv_new_raid5_bit: NULL r"));
|
||||
return (r);
|
||||
}
|
||||
|
||||
struct gv_raid5_packet *
|
||||
gv_new_raid5_packet(void)
|
||||
{
|
||||
struct gv_raid5_packet *wp;
|
||||
|
||||
wp = g_malloc(sizeof(*wp), M_NOWAIT | M_ZERO);
|
||||
KASSERT(wp != NULL, ("gv_new_raid5_packet: NULL wp"));
|
||||
wp->state = SETUP;
|
||||
wp->type = JUNK;
|
||||
TAILQ_INIT(&wp->bits);
|
||||
|
||||
return (wp);
|
||||
}
|
||||
|
||||
void
|
||||
gv_free_raid5_packet(struct gv_raid5_packet *wp)
|
||||
{
|
||||
struct gv_raid5_bit *r, *r2;
|
||||
|
||||
/* Remove all the bits from this work packet. */
|
||||
TAILQ_FOREACH_SAFE(r, &wp->bits, list, r2) {
|
||||
TAILQ_REMOVE(&wp->bits, r, list);
|
||||
if (r->malloc)
|
||||
g_free(r->buf);
|
||||
if (r->bio != NULL)
|
||||
g_destroy_bio(r->bio);
|
||||
g_free(r);
|
||||
}
|
||||
|
||||
if (wp->bufmalloc == 1)
|
||||
g_free(wp->buf);
|
||||
g_free(wp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if the stripe that the work packet wants is already being used by
|
||||
* some other work packet.
|
||||
*/
|
||||
int
|
||||
gv_stripe_active(struct gv_raid5_packet *wp, struct gv_plex *sc)
|
||||
gv_stripe_active(struct gv_plex *p, struct bio *bp)
|
||||
{
|
||||
struct gv_raid5_packet *wpa;
|
||||
struct gv_raid5_packet *wp, *owp;
|
||||
int overlap;
|
||||
|
||||
TAILQ_FOREACH(wpa, &sc->worklist, list) {
|
||||
if (wpa->lockbase == wp->lockbase) {
|
||||
if (wpa == wp)
|
||||
return (0);
|
||||
return (1);
|
||||
wp = bp->bio_driver1;
|
||||
if (wp->lockbase == -1)
|
||||
return (0);
|
||||
|
||||
overlap = 0;
|
||||
TAILQ_FOREACH(owp, &p->packets, list) {
|
||||
if (owp == wp)
|
||||
break;
|
||||
if ((wp->lockbase >= owp->lockbase) &&
|
||||
(wp->lockbase <= owp->lockbase + owp->length)) {
|
||||
overlap++;
|
||||
break;
|
||||
}
|
||||
if ((wp->lockbase <= owp->lockbase) &&
|
||||
(wp->lockbase + wp->length >= owp->lockbase)) {
|
||||
overlap++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* The "worker" thread that runs through the worklist and fires off the
|
||||
* "subrequests" needed to fulfill a RAID5 read or write request.
|
||||
*/
|
||||
void
|
||||
gv_raid5_worker(void *arg)
|
||||
{
|
||||
struct bio *bp;
|
||||
struct g_geom *gp;
|
||||
struct gv_plex *p;
|
||||
struct gv_raid5_packet *wp, *wpt;
|
||||
struct gv_raid5_bit *rbp, *rbpt;
|
||||
int error, restart;
|
||||
|
||||
gp = arg;
|
||||
p = gp->softc;
|
||||
|
||||
mtx_lock(&p->worklist_mtx);
|
||||
for (;;) {
|
||||
restart = 0;
|
||||
TAILQ_FOREACH_SAFE(wp, &p->worklist, list, wpt) {
|
||||
/* This request packet is already being processed. */
|
||||
if (wp->state == IO)
|
||||
continue;
|
||||
/* This request packet is ready for processing. */
|
||||
if (wp->state == VALID) {
|
||||
/* Couldn't get the lock, try again. */
|
||||
if ((wp->lockbase != -1) &&
|
||||
gv_stripe_active(wp, p))
|
||||
continue;
|
||||
|
||||
wp->state = IO;
|
||||
mtx_unlock(&p->worklist_mtx);
|
||||
TAILQ_FOREACH_SAFE(rbp, &wp->bits, list, rbpt)
|
||||
g_io_request(rbp->bio, rbp->consumer);
|
||||
mtx_lock(&p->worklist_mtx);
|
||||
continue;
|
||||
}
|
||||
if (wp->state == FINISH) {
|
||||
bp = wp->bio;
|
||||
bp->bio_completed += wp->length;
|
||||
/*
|
||||
* Deliver the original request if we have
|
||||
* finished.
|
||||
*/
|
||||
if (bp->bio_completed == bp->bio_length) {
|
||||
mtx_unlock(&p->worklist_mtx);
|
||||
g_io_deliver(bp, 0);
|
||||
mtx_lock(&p->worklist_mtx);
|
||||
}
|
||||
TAILQ_REMOVE(&p->worklist, wp, list);
|
||||
gv_free_raid5_packet(wp);
|
||||
restart++;
|
||||
/*break;*/
|
||||
}
|
||||
}
|
||||
if (!restart) {
|
||||
/* Self-destruct. */
|
||||
if (p->flags & GV_PLEX_THREAD_DIE)
|
||||
break;
|
||||
error = msleep(p, &p->worklist_mtx, PRIBIO, "-",
|
||||
hz/100);
|
||||
}
|
||||
}
|
||||
mtx_unlock(&p->worklist_mtx);
|
||||
|
||||
g_trace(G_T_TOPOLOGY, "gv_raid5_worker die");
|
||||
|
||||
/* Signal our plex that we are dead. */
|
||||
p->flags |= GV_PLEX_THREAD_DEAD;
|
||||
wakeup(p);
|
||||
kthread_exit(0);
|
||||
}
|
||||
|
||||
/* Final bio transaction to write out the parity data. */
|
||||
int
|
||||
gv_raid5_parity(struct gv_raid5_packet *wp)
|
||||
{
|
||||
struct bio *bp;
|
||||
|
||||
bp = g_new_bio();
|
||||
if (bp == NULL)
|
||||
return (ENOMEM);
|
||||
|
||||
wp->type = ISPARITY;
|
||||
bp->bio_cmd = BIO_WRITE;
|
||||
bp->bio_data = wp->buf;
|
||||
bp->bio_offset = wp->offset;
|
||||
bp->bio_length = wp->length;
|
||||
bp->bio_done = gv_raid5_done;
|
||||
bp->bio_caller1 = wp;
|
||||
bp->bio_caller2 = NULL;
|
||||
g_io_request(bp, wp->parity);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* We end up here after each subrequest. */
|
||||
void
|
||||
gv_raid5_done(struct bio *bp)
|
||||
{
|
||||
struct bio *obp;
|
||||
struct g_geom *gp;
|
||||
struct gv_plex *p;
|
||||
struct gv_raid5_packet *wp;
|
||||
struct gv_raid5_bit *rbp;
|
||||
off_t i;
|
||||
int error;
|
||||
|
||||
wp = bp->bio_caller1;
|
||||
rbp = bp->bio_caller2;
|
||||
obp = wp->bio;
|
||||
gp = bp->bio_from->geom;
|
||||
p = gp->softc;
|
||||
|
||||
/* One less active subrequest. */
|
||||
wp->active--;
|
||||
|
||||
switch (obp->bio_cmd) {
|
||||
case BIO_READ:
|
||||
/* Degraded reads need to handle parity data. */
|
||||
if (wp->type == DEGRADED) {
|
||||
for (i = 0; i < wp->length; i++)
|
||||
wp->buf[i] ^= bp->bio_data[i];
|
||||
|
||||
/* When we're finished copy back the data we want. */
|
||||
if (wp->active == 0)
|
||||
bcopy(wp->buf, wp->data, wp->length);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case BIO_WRITE:
|
||||
/* Handle the parity data, if needed. */
|
||||
if ((wp->type != NOPARITY) && (wp->type != ISPARITY)) {
|
||||
for (i = 0; i < wp->length; i++)
|
||||
wp->buf[i] ^= bp->bio_data[i];
|
||||
|
||||
/* Write out the parity data we calculated. */
|
||||
if (wp->active == 0) {
|
||||
wp->active++;
|
||||
error = gv_raid5_parity(wp);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* This request group is done. */
|
||||
if (wp->active == 0)
|
||||
wp->state = FINISH;
|
||||
return (overlap);
|
||||
}
|
||||
|
||||
/* Build a request group to perform (part of) a RAID5 request. */
|
||||
int
|
||||
gv_build_raid5_req(struct gv_raid5_packet *wp, struct bio *bp, caddr_t addr,
|
||||
long bcount, off_t boff)
|
||||
gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp,
|
||||
struct bio *bp, caddr_t addr, off_t boff, off_t bcount)
|
||||
{
|
||||
struct g_geom *gp;
|
||||
struct gv_plex *p;
|
||||
struct gv_raid5_bit *rbp;
|
||||
struct gv_sd *broken, *original, *parity, *s;
|
||||
int i, psdno, sdno;
|
||||
off_t len_left, real_off, stripeend, stripeoff, stripestart;
|
||||
struct gv_bioq *bq;
|
||||
struct bio *cbp, *pbp;
|
||||
int i, psdno, sdno, type;
|
||||
off_t len_left, real_len, real_off, stripeend, stripeoff, stripestart;
|
||||
|
||||
gp = bp->bio_to->geom;
|
||||
p = gp->softc;
|
||||
|
||||
if (p == NULL || LIST_EMPTY(&p->subdisks))
|
||||
return (ENXIO);
|
||||
|
||||
/* We are optimistic and assume that this request will be OK. */
|
||||
wp->type = NORMAL;
|
||||
#define REQ_TYPE_NORMAL 0
|
||||
#define REQ_TYPE_DEGRADED 1
|
||||
#define REQ_TYPE_NOPARITY 2
|
||||
|
||||
type = REQ_TYPE_NORMAL;
|
||||
original = parity = broken = NULL;
|
||||
|
||||
/* The number of the subdisk containing the parity stripe. */
|
||||
@ -330,29 +149,20 @@ gv_build_raid5_req(struct gv_raid5_packet *wp, struct bio *bp, caddr_t addr,
|
||||
|
||||
/* Our data stripe is missing. */
|
||||
if (original->state != GV_SD_UP)
|
||||
wp->type = DEGRADED;
|
||||
type = REQ_TYPE_DEGRADED;
|
||||
/* Our parity stripe is missing. */
|
||||
if (parity->state != GV_SD_UP) {
|
||||
/* We cannot take another failure if we're already degraded. */
|
||||
if (wp->type != NORMAL)
|
||||
if (type != REQ_TYPE_NORMAL)
|
||||
return (ENXIO);
|
||||
else
|
||||
wp->type = NOPARITY;
|
||||
type = REQ_TYPE_NOPARITY;
|
||||
}
|
||||
|
||||
/*
|
||||
* A combined write is necessary when the original data subdisk and the
|
||||
* parity subdisk are both up, but one of the other subdisks isn't.
|
||||
*/
|
||||
if ((broken != NULL) && (broken != parity) && (broken != original))
|
||||
wp->type = COMBINED;
|
||||
|
||||
wp->offset = real_off;
|
||||
wp->length = (bcount <= len_left) ? bcount : len_left;
|
||||
real_len = (bcount <= len_left) ? bcount : len_left;
|
||||
wp->length = real_len;
|
||||
wp->data = addr;
|
||||
wp->original = original->consumer;
|
||||
wp->parity = parity->consumer;
|
||||
wp->lockbase = stripestart;
|
||||
wp->lockbase = real_off;
|
||||
|
||||
KASSERT(wp->length >= 0, ("gv_build_raid5_request: wp->length < 0"));
|
||||
|
||||
@ -363,58 +173,45 @@ gv_build_raid5_req(struct gv_raid5_packet *wp, struct bio *bp, caddr_t addr,
|
||||
* the broken one plus the parity stripe and then recalculate
|
||||
* the desired data.
|
||||
*/
|
||||
if (wp->type == DEGRADED) {
|
||||
wp->buf = g_malloc(wp->length, M_NOWAIT | M_ZERO);
|
||||
if (wp->buf == NULL)
|
||||
return (ENOMEM);
|
||||
wp->bufmalloc = 1;
|
||||
if (type == REQ_TYPE_DEGRADED) {
|
||||
bzero(wp->data, wp->length);
|
||||
LIST_FOREACH(s, &p->subdisks, in_plex) {
|
||||
/* Skip the broken subdisk. */
|
||||
if (s == broken)
|
||||
continue;
|
||||
rbp = gv_new_raid5_bit();
|
||||
rbp->consumer = s->consumer;
|
||||
rbp->bio = g_new_bio();
|
||||
if (rbp->bio == NULL)
|
||||
cbp = g_clone_bio(bp);
|
||||
if (cbp == NULL)
|
||||
return (ENOMEM);
|
||||
rbp->buf = g_malloc(wp->length,
|
||||
M_NOWAIT | M_ZERO);
|
||||
if (rbp->buf == NULL)
|
||||
return (ENOMEM);
|
||||
rbp->malloc = 1;
|
||||
rbp->bio->bio_cmd = BIO_READ;
|
||||
rbp->bio->bio_offset = wp->offset;
|
||||
rbp->bio->bio_length = wp->length;
|
||||
rbp->bio->bio_data = rbp->buf;
|
||||
rbp->bio->bio_done = gv_raid5_done;
|
||||
rbp->bio->bio_caller1 = wp;
|
||||
rbp->bio->bio_caller2 = rbp;
|
||||
TAILQ_INSERT_HEAD(&wp->bits, rbp, list);
|
||||
wp->active++;
|
||||
wp->rqcount++;
|
||||
cbp->bio_data = g_malloc(real_len, M_WAITOK);
|
||||
cbp->bio_cflags |= GV_BIO_MALLOC;
|
||||
cbp->bio_offset = real_off;
|
||||
cbp->bio_length = real_len;
|
||||
cbp->bio_done = gv_plex_done;
|
||||
cbp->bio_caller2 = s->consumer;
|
||||
cbp->bio_driver1 = wp;
|
||||
|
||||
GV_ENQUEUE(bp, cbp, pbp);
|
||||
|
||||
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
|
||||
bq->bp = cbp;
|
||||
TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
|
||||
}
|
||||
|
||||
/* A normal read can be fulfilled with the original subdisk. */
|
||||
} else {
|
||||
rbp = gv_new_raid5_bit();
|
||||
rbp->consumer = wp->original;
|
||||
rbp->bio = g_new_bio();
|
||||
if (rbp->bio == NULL)
|
||||
cbp = g_clone_bio(bp);
|
||||
if (cbp == NULL)
|
||||
return (ENOMEM);
|
||||
rbp->bio->bio_cmd = BIO_READ;
|
||||
rbp->bio->bio_offset = wp->offset;
|
||||
rbp->bio->bio_length = wp->length;
|
||||
rbp->buf = addr;
|
||||
rbp->bio->bio_data = rbp->buf;
|
||||
rbp->bio->bio_done = gv_raid5_done;
|
||||
rbp->bio->bio_caller1 = wp;
|
||||
rbp->bio->bio_caller2 = rbp;
|
||||
TAILQ_INSERT_HEAD(&wp->bits, rbp, list);
|
||||
wp->active++;
|
||||
wp->rqcount++;
|
||||
cbp->bio_offset = real_off;
|
||||
cbp->bio_length = real_len;
|
||||
cbp->bio_data = addr;
|
||||
cbp->bio_done = g_std_done;
|
||||
cbp->bio_caller2 = original->consumer;
|
||||
|
||||
GV_ENQUEUE(bp, cbp, pbp);
|
||||
}
|
||||
if (wp->type != COMBINED)
|
||||
wp->lockbase = -1;
|
||||
wp->lockbase = -1;
|
||||
|
||||
break;
|
||||
|
||||
case BIO_WRITE:
|
||||
@ -424,164 +221,65 @@ gv_build_raid5_req(struct gv_raid5_packet *wp, struct bio *bp, caddr_t addr,
|
||||
* recalculate the parity from the original data, and then
|
||||
* write the parity stripe back out.
|
||||
*/
|
||||
if (wp->type == DEGRADED) {
|
||||
wp->buf = g_malloc(wp->length, M_NOWAIT | M_ZERO);
|
||||
if (wp->buf == NULL)
|
||||
return (ENOMEM);
|
||||
wp->bufmalloc = 1;
|
||||
|
||||
/* Copy the original data. */
|
||||
bcopy(wp->data, wp->buf, wp->length);
|
||||
|
||||
if (type == REQ_TYPE_DEGRADED) {
|
||||
/* Read all subdisks. */
|
||||
LIST_FOREACH(s, &p->subdisks, in_plex) {
|
||||
/* Skip the broken and the parity subdisk. */
|
||||
if ((s == broken) ||
|
||||
(s->consumer == wp->parity))
|
||||
if ((s == broken) || (s == parity))
|
||||
continue;
|
||||
|
||||
rbp = gv_new_raid5_bit();
|
||||
rbp->consumer = s->consumer;
|
||||
rbp->bio = g_new_bio();
|
||||
if (rbp->bio == NULL)
|
||||
cbp = g_clone_bio(bp);
|
||||
if (cbp == NULL)
|
||||
return (ENOMEM);
|
||||
rbp->buf = g_malloc(wp->length,
|
||||
M_NOWAIT | M_ZERO);
|
||||
if (rbp->buf == NULL)
|
||||
return (ENOMEM);
|
||||
rbp->malloc = 1;
|
||||
rbp->bio->bio_cmd = BIO_READ;
|
||||
rbp->bio->bio_data = rbp->buf;
|
||||
rbp->bio->bio_offset = wp->offset;
|
||||
rbp->bio->bio_length = wp->length;
|
||||
rbp->bio->bio_done = gv_raid5_done;
|
||||
rbp->bio->bio_caller1 = wp;
|
||||
rbp->bio->bio_caller2 = rbp;
|
||||
TAILQ_INSERT_HEAD(&wp->bits, rbp, list);
|
||||
wp->active++;
|
||||
wp->rqcount++;
|
||||
cbp->bio_cmd = BIO_READ;
|
||||
cbp->bio_data = g_malloc(real_len, M_WAITOK);
|
||||
cbp->bio_cflags |= GV_BIO_MALLOC;
|
||||
cbp->bio_offset = real_off;
|
||||
cbp->bio_length = real_len;
|
||||
cbp->bio_done = gv_plex_done;
|
||||
cbp->bio_caller2 = s->consumer;
|
||||
cbp->bio_driver1 = wp;
|
||||
|
||||
GV_ENQUEUE(bp, cbp, pbp);
|
||||
|
||||
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
|
||||
bq->bp = cbp;
|
||||
TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
|
||||
}
|
||||
|
||||
/* Write the parity data. */
|
||||
cbp = g_clone_bio(bp);
|
||||
if (cbp == NULL)
|
||||
return (ENOMEM);
|
||||
cbp->bio_data = g_malloc(real_len, M_WAITOK);
|
||||
cbp->bio_cflags |= GV_BIO_MALLOC;
|
||||
bcopy(addr, cbp->bio_data, real_len);
|
||||
cbp->bio_offset = real_off;
|
||||
cbp->bio_length = real_len;
|
||||
cbp->bio_done = gv_plex_done;
|
||||
cbp->bio_caller2 = parity->consumer;
|
||||
cbp->bio_driver1 = wp;
|
||||
wp->parity = cbp;
|
||||
|
||||
/*
|
||||
* When we don't have the parity stripe we just write out the
|
||||
* data.
|
||||
* When the parity stripe is missing we just write out the data.
|
||||
*/
|
||||
} else if (wp->type == NOPARITY) {
|
||||
rbp = gv_new_raid5_bit();
|
||||
rbp->consumer = wp->original;
|
||||
rbp->bio = g_new_bio();
|
||||
if (rbp->bio == NULL)
|
||||
} else if (type == REQ_TYPE_NOPARITY) {
|
||||
cbp = g_clone_bio(bp);
|
||||
if (cbp == NULL)
|
||||
return (ENOMEM);
|
||||
rbp->bio->bio_cmd = BIO_WRITE;
|
||||
rbp->bio->bio_offset = wp->offset;
|
||||
rbp->bio->bio_length = wp->length;
|
||||
rbp->bio->bio_data = addr;
|
||||
rbp->bio->bio_done = gv_raid5_done;
|
||||
rbp->bio->bio_caller1 = wp;
|
||||
rbp->bio->bio_caller2 = rbp;
|
||||
TAILQ_INSERT_HEAD(&wp->bits, rbp, list);
|
||||
wp->active++;
|
||||
wp->rqcount++;
|
||||
cbp->bio_offset = real_off;
|
||||
cbp->bio_length = real_len;
|
||||
cbp->bio_data = addr;
|
||||
cbp->bio_done = gv_plex_done;
|
||||
cbp->bio_caller2 = original->consumer;
|
||||
cbp->bio_driver1 = wp;
|
||||
|
||||
/*
|
||||
* A combined write means that our data subdisk and the parity
|
||||
* subdisks are both up, but another subdisk isn't. We need to
|
||||
* read all valid stripes including the parity to recalculate
|
||||
* the data of the stripe that is missing. Then we write our
|
||||
* original data, and together with the other data stripes
|
||||
* recalculate the parity again.
|
||||
*/
|
||||
} else if (wp->type == COMBINED) {
|
||||
wp->buf = g_malloc(wp->length, M_NOWAIT | M_ZERO);
|
||||
if (wp->buf == NULL)
|
||||
return (ENOMEM);
|
||||
wp->bufmalloc = 1;
|
||||
GV_ENQUEUE(bp, cbp, pbp);
|
||||
|
||||
/* Get the data from all subdisks. */
|
||||
LIST_FOREACH(s, &p->subdisks, in_plex) {
|
||||
/* Skip the broken subdisk. */
|
||||
if (s == broken)
|
||||
continue;
|
||||
|
||||
rbp = gv_new_raid5_bit();
|
||||
rbp->consumer = s->consumer;
|
||||
rbp->bio = g_new_bio();
|
||||
if (rbp->bio == NULL)
|
||||
return (ENOMEM);
|
||||
rbp->bio->bio_cmd = BIO_READ;
|
||||
rbp->buf = g_malloc(wp->length,
|
||||
M_NOWAIT | M_ZERO);
|
||||
if (rbp->buf == NULL)
|
||||
return (ENOMEM);
|
||||
rbp->malloc = 1;
|
||||
rbp->bio->bio_data = rbp->buf;
|
||||
rbp->bio->bio_offset = wp->offset;
|
||||
rbp->bio->bio_length = wp->length;
|
||||
rbp->bio->bio_done = gv_raid5_done;
|
||||
rbp->bio->bio_caller1 = wp;
|
||||
rbp->bio->bio_caller2 = rbp;
|
||||
TAILQ_INSERT_HEAD(&wp->bits, rbp, list);
|
||||
wp->active++;
|
||||
wp->rqcount++;
|
||||
}
|
||||
|
||||
/* Write the original data. */
|
||||
rbp = gv_new_raid5_bit();
|
||||
rbp->consumer = wp->original;
|
||||
rbp->buf = addr;
|
||||
rbp->bio = g_new_bio();
|
||||
if (rbp->bio == NULL)
|
||||
return (ENOMEM);
|
||||
rbp->bio->bio_cmd = BIO_WRITE;
|
||||
rbp->bio->bio_data = rbp->buf;
|
||||
rbp->bio->bio_offset = wp->offset;
|
||||
rbp->bio->bio_length = wp->length;
|
||||
rbp->bio->bio_done = gv_raid5_done;
|
||||
rbp->bio->bio_caller1 = wp;
|
||||
rbp->bio->bio_caller2 = rbp;
|
||||
/*
|
||||
* Insert at the tail, because we want to read the old
|
||||
* data first.
|
||||
*/
|
||||
TAILQ_INSERT_TAIL(&wp->bits, rbp, list);
|
||||
wp->active++;
|
||||
wp->rqcount++;
|
||||
|
||||
/* Get the rest of the data again. */
|
||||
LIST_FOREACH(s, &p->subdisks, in_plex) {
|
||||
/*
|
||||
* Skip the broken subdisk, the parity, and the
|
||||
* one we just wrote.
|
||||
*/
|
||||
if ((s == broken) ||
|
||||
(s->consumer == wp->parity) ||
|
||||
(s->consumer == wp->original))
|
||||
continue;
|
||||
rbp = gv_new_raid5_bit();
|
||||
rbp->consumer = s->consumer;
|
||||
rbp->bio = g_new_bio();
|
||||
if (rbp->bio == NULL)
|
||||
return (ENOMEM);
|
||||
rbp->bio->bio_cmd = BIO_READ;
|
||||
rbp->buf = g_malloc(wp->length,
|
||||
M_NOWAIT | M_ZERO);
|
||||
if (rbp->buf == NULL)
|
||||
return (ENOMEM);
|
||||
rbp->malloc = 1;
|
||||
rbp->bio->bio_data = rbp->buf;
|
||||
rbp->bio->bio_offset = wp->offset;
|
||||
rbp->bio->bio_length = wp->length;
|
||||
rbp->bio->bio_done = gv_raid5_done;
|
||||
rbp->bio->bio_caller1 = wp;
|
||||
rbp->bio->bio_caller2 = rbp;
|
||||
/*
|
||||
* Again, insert at the tail to keep correct
|
||||
* order.
|
||||
*/
|
||||
TAILQ_INSERT_TAIL(&wp->bits, rbp, list);
|
||||
wp->active++;
|
||||
wp->rqcount++;
|
||||
}
|
||||
|
||||
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
|
||||
bq->bp = cbp;
|
||||
TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
|
||||
|
||||
/*
|
||||
* A normal write request goes to the original subdisk, then we
|
||||
@ -589,52 +287,83 @@ gv_build_raid5_req(struct gv_raid5_packet *wp, struct bio *bp, caddr_t addr,
|
||||
* out the parity again.
|
||||
*/
|
||||
} else {
|
||||
wp->buf = g_malloc(wp->length, M_NOWAIT | M_ZERO);
|
||||
if (wp->buf == NULL)
|
||||
/* Read old parity. */
|
||||
cbp = g_clone_bio(bp);
|
||||
if (cbp == NULL)
|
||||
return (ENOMEM);
|
||||
wp->bufmalloc = 1;
|
||||
LIST_FOREACH(s, &p->subdisks, in_plex) {
|
||||
/* Skip the parity stripe. */
|
||||
if (s->consumer == wp->parity)
|
||||
continue;
|
||||
cbp->bio_cmd = BIO_READ;
|
||||
cbp->bio_data = g_malloc(real_len, M_WAITOK);
|
||||
cbp->bio_cflags |= GV_BIO_MALLOC;
|
||||
cbp->bio_offset = real_off;
|
||||
cbp->bio_length = real_len;
|
||||
cbp->bio_done = gv_plex_done;
|
||||
cbp->bio_caller2 = parity->consumer;
|
||||
cbp->bio_driver1 = wp;
|
||||
|
||||
rbp = gv_new_raid5_bit();
|
||||
rbp->consumer = s->consumer;
|
||||
rbp->bio = g_new_bio();
|
||||
if (rbp->bio == NULL)
|
||||
return (ENOMEM);
|
||||
/*
|
||||
* The data for the original stripe is written,
|
||||
* the others need to be read in for the parity
|
||||
* calculation.
|
||||
*/
|
||||
if (s->consumer == wp->original) {
|
||||
rbp->bio->bio_cmd = BIO_WRITE;
|
||||
rbp->buf = addr;
|
||||
} else {
|
||||
rbp->bio->bio_cmd = BIO_READ;
|
||||
rbp->buf = g_malloc(wp->length,
|
||||
M_NOWAIT | M_ZERO);
|
||||
if (rbp->buf == NULL)
|
||||
return (ENOMEM);
|
||||
rbp->malloc = 1;
|
||||
}
|
||||
rbp->bio->bio_data = rbp->buf;
|
||||
rbp->bio->bio_offset = wp->offset;
|
||||
rbp->bio->bio_length = wp->length;
|
||||
rbp->bio->bio_done = gv_raid5_done;
|
||||
rbp->bio->bio_caller1 = wp;
|
||||
rbp->bio->bio_caller2 = rbp;
|
||||
TAILQ_INSERT_HEAD(&wp->bits, rbp, list);
|
||||
wp->active++;
|
||||
wp->rqcount++;
|
||||
}
|
||||
GV_ENQUEUE(bp, cbp, pbp);
|
||||
|
||||
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
|
||||
bq->bp = cbp;
|
||||
TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
|
||||
|
||||
/* Read old data. */
|
||||
cbp = g_clone_bio(bp);
|
||||
if (cbp == NULL)
|
||||
return (ENOMEM);
|
||||
cbp->bio_cmd = BIO_READ;
|
||||
cbp->bio_data = g_malloc(real_len, M_WAITOK);
|
||||
cbp->bio_cflags |= GV_BIO_MALLOC;
|
||||
cbp->bio_offset = real_off;
|
||||
cbp->bio_length = real_len;
|
||||
cbp->bio_done = gv_plex_done;
|
||||
cbp->bio_caller2 = original->consumer;
|
||||
cbp->bio_driver1 = wp;
|
||||
|
||||
GV_ENQUEUE(bp, cbp, pbp);
|
||||
|
||||
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
|
||||
bq->bp = cbp;
|
||||
TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
|
||||
|
||||
/* Write new data. */
|
||||
cbp = g_clone_bio(bp);
|
||||
if (cbp == NULL)
|
||||
return (ENOMEM);
|
||||
cbp->bio_data = addr;
|
||||
cbp->bio_offset = real_off;
|
||||
cbp->bio_length = real_len;
|
||||
cbp->bio_done = gv_plex_done;
|
||||
cbp->bio_caller2 = original->consumer;
|
||||
|
||||
cbp->bio_driver1 = wp;
|
||||
|
||||
/*
|
||||
* We must not write the new data until the old data
|
||||
* was read, so hold this BIO back until we're ready
|
||||
* for it.
|
||||
*/
|
||||
wp->waiting = cbp;
|
||||
|
||||
/* The final bio for the parity. */
|
||||
cbp = g_clone_bio(bp);
|
||||
if (cbp == NULL)
|
||||
return (ENOMEM);
|
||||
cbp->bio_data = g_malloc(real_len, M_WAITOK | M_ZERO);
|
||||
cbp->bio_cflags |= GV_BIO_MALLOC;
|
||||
cbp->bio_offset = real_off;
|
||||
cbp->bio_length = real_len;
|
||||
cbp->bio_done = gv_plex_done;
|
||||
cbp->bio_caller2 = parity->consumer;
|
||||
cbp->bio_driver1 = wp;
|
||||
|
||||
/* Remember that this is the BIO for the parity data. */
|
||||
wp->parity = cbp;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
wp->state = VALID;
|
||||
return (0);
|
||||
}
|
||||
|
@ -32,22 +32,23 @@
|
||||
/*
|
||||
* A single RAID5 request usually needs more than one I/O transaction,
|
||||
* depending on the state of the associated subdisks and the direction of the
|
||||
* transaction (read or write). Every subrequest of a RAID5 request,
|
||||
* represented by a gv_raid_packet, is defined by a gv_raid5_bit.
|
||||
* transaction (read or write).
|
||||
*/
|
||||
|
||||
/* A subrequest of a RAID5 read/write operation. */
|
||||
struct gv_raid5_bit {
|
||||
struct bio *bio; /* BIO of this subrequest. */
|
||||
caddr_t buf; /* Data buffer of this subrequest. */
|
||||
int malloc; /* Flag if data buffer was malloced. */
|
||||
struct g_consumer *consumer; /* Consumer to send the BIO to. */
|
||||
TAILQ_ENTRY(gv_raid5_bit) list; /* Entry in the list of this request. */
|
||||
};
|
||||
#define GV_ENQUEUE(bp, cbp, pbp) \
|
||||
do { \
|
||||
if (bp->bio_driver1 == NULL) { \
|
||||
bp->bio_driver1 = cbp; \
|
||||
} else { \
|
||||
pbp = bp->bio_driver1; \
|
||||
while (pbp->bio_caller1 != NULL) \
|
||||
pbp = pbp->bio_caller1; \
|
||||
pbp->bio_caller1 = cbp; \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
/* Container for one or more gv_raid5_bits; represents a RAID5 I/O request. */
|
||||
struct gv_raid5_packet {
|
||||
caddr_t buf; /* Data buffer of this RAID5 request. */
|
||||
caddr_t data; /* Data buffer of this sub-request- */
|
||||
off_t length; /* Size of data buffer. */
|
||||
off_t lockbase; /* Deny access to our plex offset. */
|
||||
off_t offset; /* The drive offset of the subdisk. */
|
||||
@ -56,39 +57,17 @@ struct gv_raid5_packet {
|
||||
int rqcount; /* Count of subrequests. */
|
||||
|
||||
struct bio *bio; /* Pointer to the original bio. */
|
||||
caddr_t data; /* Pointer to the original data. */
|
||||
struct bio *parity; /* The bio containing the parity data. */
|
||||
struct bio *waiting; /* A bio that need to wait for other bios. */
|
||||
|
||||
struct g_consumer *original; /* Consumer to the data stripe. */
|
||||
struct g_consumer *parity; /* Consumer to the parity stripe. */
|
||||
|
||||
/* State of this RAID5 packet. */
|
||||
enum {
|
||||
SETUP, /* Newly created. */
|
||||
VALID, /* Ready for processing. */
|
||||
IO, /* Currently doing I/O. */
|
||||
FINISH /* Packet has finished. */
|
||||
} state;
|
||||
|
||||
/* Type of this RAID5 transaction. */
|
||||
enum {
|
||||
JUNK, /* Newly created, not valid. */
|
||||
NORMAL, /* Normal read or write. */
|
||||
ISPARITY, /* Containing only parity data. */
|
||||
NOPARITY, /* Parity stripe not available. */
|
||||
DEGRADED, /* Data stripe not available. */
|
||||
COMBINED /* Data and parity stripes ok, others not. */
|
||||
} type;
|
||||
|
||||
TAILQ_HEAD(,gv_raid5_bit) bits; /* List of subrequests. */
|
||||
TAILQ_ENTRY(gv_raid5_packet) list; /* Entry in plex's packet list. */
|
||||
TAILQ_HEAD(,gv_bioq) bits; /* List of subrequests. */
|
||||
TAILQ_ENTRY(gv_raid5_packet) list; /* Entry in plex's packet list. */
|
||||
};
|
||||
|
||||
int gv_build_raid5_req(struct gv_raid5_packet *, struct bio *, caddr_t,
|
||||
long, off_t);
|
||||
void gv_free_raid5_packet(struct gv_raid5_packet *);
|
||||
void gv_raid5_done(struct bio *);
|
||||
int gv_stripe_active(struct gv_plex *, struct bio *);
|
||||
int gv_build_raid5_req(struct gv_plex *, struct gv_raid5_packet *,
|
||||
struct bio *, caddr_t, off_t, off_t);
|
||||
void gv_raid5_worker(void *);
|
||||
struct gv_raid5_packet *gv_new_raid5_packet(void);
|
||||
struct gv_raid5_bit *gv_new_raid5_bit(void);
|
||||
void gv_plex_done(struct bio *);
|
||||
|
||||
#endif /* !_GEOM_VINUM_RAID5_H_ */
|
||||
|
@ -166,6 +166,7 @@ gv_rm_vol(struct gv_softc *sc, struct gctl_req *req, struct gv_volume *v, int fl
|
||||
|
||||
/* Clean up and let our geom fade away. */
|
||||
LIST_REMOVE(v, volume);
|
||||
gv_kill_vol_thread(v);
|
||||
g_free(v);
|
||||
if (gp != NULL) {
|
||||
gp->softc = NULL;
|
||||
|
@ -832,12 +832,25 @@ gv_kill_drive_thread(struct gv_drive *d)
|
||||
void
|
||||
gv_kill_plex_thread(struct gv_plex *p)
|
||||
{
|
||||
if ((p->org == GV_PLEX_RAID5) && (p->flags & GV_PLEX_THREAD_ACTIVE)) {
|
||||
if (p->flags & GV_PLEX_THREAD_ACTIVE) {
|
||||
p->flags |= GV_PLEX_THREAD_DIE;
|
||||
wakeup(p);
|
||||
while (!(p->flags & GV_PLEX_THREAD_DEAD))
|
||||
tsleep(p, PRIBIO, "gv_die", hz);
|
||||
p->flags &= ~GV_PLEX_THREAD_ACTIVE;
|
||||
mtx_destroy(&p->worklist_mtx);
|
||||
mtx_destroy(&p->bqueue_mtx);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
gv_kill_vol_thread(struct gv_volume *v)
|
||||
{
|
||||
if (v->flags & GV_VOL_THREAD_ACTIVE) {
|
||||
v->flags |= GV_VOL_THREAD_DIE;
|
||||
wakeup(v);
|
||||
while (!(v->flags & GV_VOL_THREAD_DEAD))
|
||||
tsleep(v, PRIBIO, "gv_die", hz);
|
||||
v->flags &= ~GV_VOL_THREAD_ACTIVE;
|
||||
mtx_destroy(&v->bqueue_mtx);
|
||||
}
|
||||
}
|
||||
|
@ -111,6 +111,8 @@
|
||||
#define GV_BIO_DONE 0x01
|
||||
#define GV_BIO_MALLOC 0x02
|
||||
#define GV_BIO_ONHOLD 0x04
|
||||
#define GV_BIO_SYNCREQ 0x08
|
||||
#define GV_BIO_SUCCEED 0x10
|
||||
|
||||
/*
|
||||
* hostname is 256 bytes long, but we don't need to shlep multiple copies in
|
||||
@ -269,8 +271,9 @@ struct gv_plex {
|
||||
|
||||
off_t synced; /* Count of synced bytes. */
|
||||
|
||||
struct mtx worklist_mtx; /* Mutex for RAID5 worklist. */
|
||||
TAILQ_HEAD(,gv_raid5_packet) worklist; /* List of RAID5 work packets. */
|
||||
struct mtx bqueue_mtx; /* Lock for the BIO queue. */
|
||||
TAILQ_HEAD(,gv_bioq) bqueue; /* BIO queue. */
|
||||
TAILQ_HEAD(,gv_raid5_packet) packets; /* RAID5 sub-requests. */
|
||||
|
||||
LIST_HEAD(,gv_sd) subdisks; /* List of attached subdisks. */
|
||||
LIST_ENTRY(gv_plex) in_volume; /* Plex list of associated volume. */
|
||||
@ -292,6 +295,14 @@ struct gv_volume {
|
||||
#define GV_VOL_DOWN 0
|
||||
#define GV_VOL_UP 1
|
||||
|
||||
int flags;
|
||||
#define GV_VOL_THREAD_ACTIVE 0x01 /* Volume has an active thread. */
|
||||
#define GV_VOL_THREAD_DIE 0x02 /* Signal the thread to die. */
|
||||
#define GV_VOL_THREAD_DEAD 0x04 /* The thread has died. */
|
||||
|
||||
struct mtx bqueue_mtx; /* Lock for the BIO queue. */
|
||||
TAILQ_HEAD(,gv_bioq) bqueue; /* BIO queue. */
|
||||
|
||||
LIST_HEAD(,gv_plex) plexes; /* List of attached plexes. */
|
||||
LIST_ENTRY(gv_volume) volume; /* Entry in vinum config. */
|
||||
|
||||
|
@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/bio.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/kthread.h>
|
||||
#include <sys/libkern.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/malloc.h>
|
||||
@ -42,6 +43,9 @@ __FBSDID("$FreeBSD$");
|
||||
#include <geom/vinum/geom_vinum_var.h>
|
||||
#include <geom/vinum/geom_vinum.h>
|
||||
|
||||
static void gv_vol_completed_request(struct gv_volume *, struct bio *);
|
||||
static void gv_vol_normal_request(struct gv_volume *, struct bio *);
|
||||
|
||||
static void
|
||||
gv_volume_orphan(struct g_consumer *cp)
|
||||
{
|
||||
@ -62,8 +66,10 @@ gv_volume_orphan(struct g_consumer *cp)
|
||||
if (!LIST_EMPTY(&gp->consumer))
|
||||
return;
|
||||
v = gp->softc;
|
||||
if (v != NULL)
|
||||
if (v != NULL) {
|
||||
gv_kill_vol_thread(v);
|
||||
v->geom = NULL;
|
||||
}
|
||||
gp->softc = NULL;
|
||||
g_wither_geom(gp, error);
|
||||
}
|
||||
@ -72,79 +78,186 @@ gv_volume_orphan(struct g_consumer *cp)
|
||||
static void
|
||||
gv_volume_done(struct bio *bp)
|
||||
{
|
||||
struct g_consumer *cp;
|
||||
|
||||
/* The next plex in this volume. */
|
||||
cp = LIST_NEXT(bp->bio_from, consumer);
|
||||
struct gv_volume *v;
|
||||
struct gv_bioq *bq;
|
||||
|
||||
switch (bp->bio_cmd) {
|
||||
case BIO_READ:
|
||||
/*
|
||||
* If no error occured on this request, or if we have no plex
|
||||
* left, finish here...
|
||||
*/
|
||||
if ((bp->bio_error == 0) || (cp == NULL)) {
|
||||
g_std_done(bp);
|
||||
return;
|
||||
}
|
||||
|
||||
/* ... or try to read from the next plex. */
|
||||
g_io_request(bp, cp);
|
||||
return;
|
||||
|
||||
case BIO_WRITE:
|
||||
case BIO_DELETE:
|
||||
/* No more plexes left. */
|
||||
if (cp == NULL) {
|
||||
/*
|
||||
* Clear any errors if one of the previous writes
|
||||
* succeeded.
|
||||
*/
|
||||
if (bp->bio_caller1 == (int *)1)
|
||||
bp->bio_error = 0;
|
||||
g_std_done(bp);
|
||||
return;
|
||||
}
|
||||
|
||||
/* If this write request had no errors, remember that fact... */
|
||||
if (bp->bio_error == 0)
|
||||
bp->bio_caller1 = (int *)1;
|
||||
|
||||
/* ... and write to the next plex. */
|
||||
g_io_request(bp, cp);
|
||||
return;
|
||||
}
|
||||
v = bp->bio_from->geom->softc;
|
||||
bp->bio_cflags |= GV_BIO_DONE;
|
||||
bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
|
||||
bq->bp = bp;
|
||||
mtx_lock(&v->bqueue_mtx);
|
||||
TAILQ_INSERT_TAIL(&v->bqueue, bq, queue);
|
||||
wakeup(v);
|
||||
mtx_unlock(&v->bqueue_mtx);
|
||||
}
|
||||
|
||||
static void
|
||||
gv_volume_start(struct bio *bp)
|
||||
{
|
||||
struct g_geom *gp;
|
||||
struct bio *bp2;
|
||||
struct gv_volume *v;
|
||||
struct gv_bioq *bq;
|
||||
|
||||
gp = bp->bio_to->geom;
|
||||
v = gp->softc;
|
||||
if (v->state != GV_VOL_UP) {
|
||||
g_io_deliver(bp, ENXIO);
|
||||
return;
|
||||
}
|
||||
switch(bp->bio_cmd) {
|
||||
case BIO_READ:
|
||||
case BIO_WRITE:
|
||||
case BIO_DELETE:
|
||||
bp2 = g_clone_bio(bp);
|
||||
if (bp2 == NULL) {
|
||||
g_io_deliver(bp, ENOMEM);
|
||||
return;
|
||||
}
|
||||
bp2->bio_done = gv_volume_done;
|
||||
g_io_request(bp2, LIST_FIRST(&gp->consumer));
|
||||
return;
|
||||
break;
|
||||
case BIO_GETATTR:
|
||||
default:
|
||||
g_io_deliver(bp, EOPNOTSUPP);
|
||||
return;
|
||||
}
|
||||
|
||||
v = bp->bio_to->geom->softc;
|
||||
if (v->state != GV_VOL_UP) {
|
||||
g_io_deliver(bp, ENXIO);
|
||||
return;
|
||||
}
|
||||
|
||||
bq = g_malloc(sizeof(*bq), M_NOWAIT | M_ZERO);
|
||||
bq->bp = bp;
|
||||
mtx_lock(&v->bqueue_mtx);
|
||||
TAILQ_INSERT_TAIL(&v->bqueue, bq, queue);
|
||||
wakeup(v);
|
||||
mtx_unlock(&v->bqueue_mtx);
|
||||
}
|
||||
|
||||
static void
|
||||
gv_vol_worker(void *arg)
|
||||
{
|
||||
struct bio *bp;
|
||||
struct gv_volume *v;
|
||||
struct gv_bioq *bq;
|
||||
|
||||
v = arg;
|
||||
KASSERT(v != NULL, ("NULL v"));
|
||||
mtx_lock(&v->bqueue_mtx);
|
||||
for (;;) {
|
||||
/* We were signaled to exit. */
|
||||
if (v->flags & GV_VOL_THREAD_DIE)
|
||||
break;
|
||||
|
||||
/* Take the first BIO from our queue. */
|
||||
bq = TAILQ_FIRST(&v->bqueue);
|
||||
if (bq == NULL) {
|
||||
msleep(v, &v->bqueue_mtx, PRIBIO, "-", hz/10);
|
||||
continue;
|
||||
}
|
||||
TAILQ_REMOVE(&v->bqueue, bq, queue);
|
||||
mtx_unlock(&v->bqueue_mtx);
|
||||
|
||||
bp = bq->bp;
|
||||
g_free(bq);
|
||||
|
||||
if (bp->bio_cflags & GV_BIO_DONE)
|
||||
gv_vol_completed_request(v, bp);
|
||||
else
|
||||
gv_vol_normal_request(v, bp);
|
||||
|
||||
mtx_lock(&v->bqueue_mtx);
|
||||
}
|
||||
mtx_unlock(&v->bqueue_mtx);
|
||||
v->flags |= GV_VOL_THREAD_DEAD;
|
||||
wakeup(v);
|
||||
|
||||
kthread_exit(ENXIO);
|
||||
}
|
||||
|
||||
static void
|
||||
gv_vol_completed_request(struct gv_volume *v, struct bio *bp)
|
||||
{
|
||||
struct bio *pbp;
|
||||
struct gv_bioq *bq;
|
||||
|
||||
pbp = bp->bio_parent;
|
||||
|
||||
if (pbp->bio_error == 0)
|
||||
pbp->bio_error = bp->bio_error;
|
||||
|
||||
switch (pbp->bio_cmd) {
|
||||
case BIO_READ:
|
||||
if (bp->bio_error) {
|
||||
g_destroy_bio(bp);
|
||||
pbp->bio_children--;
|
||||
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
|
||||
bq->bp = pbp;
|
||||
mtx_lock(&v->bqueue_mtx);
|
||||
TAILQ_INSERT_TAIL(&v->bqueue, bq, queue);
|
||||
mtx_unlock(&v->bqueue_mtx);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case BIO_WRITE:
|
||||
case BIO_DELETE:
|
||||
break;
|
||||
}
|
||||
|
||||
/* When the original request is finished, we deliver it. */
|
||||
pbp->bio_inbed++;
|
||||
if (pbp->bio_inbed == pbp->bio_children) {
|
||||
pbp->bio_completed = bp->bio_length;
|
||||
g_io_deliver(pbp, pbp->bio_error);
|
||||
}
|
||||
|
||||
g_destroy_bio(bp);
|
||||
}
|
||||
|
||||
static void
|
||||
gv_vol_normal_request(struct gv_volume *v, struct bio *bp)
|
||||
{
|
||||
struct g_geom *gp;
|
||||
struct gv_plex *p;
|
||||
struct bio *cbp, *pbp;
|
||||
|
||||
gp = v->geom;
|
||||
|
||||
switch (bp->bio_cmd) {
|
||||
case BIO_READ:
|
||||
cbp = g_clone_bio(bp);
|
||||
if (cbp == NULL) {
|
||||
g_io_deliver(bp, ENOMEM);
|
||||
return;
|
||||
}
|
||||
cbp->bio_done = gv_volume_done;
|
||||
LIST_FOREACH(p, &v->plexes, in_volume) {
|
||||
if (p->state >= GV_PLEX_DEGRADED)
|
||||
break;
|
||||
}
|
||||
g_io_request(cbp, p->consumer);
|
||||
|
||||
break;
|
||||
|
||||
case BIO_WRITE:
|
||||
case BIO_DELETE:
|
||||
LIST_FOREACH(p, &v->plexes, in_volume) {
|
||||
if (p->state < GV_PLEX_DEGRADED)
|
||||
continue;
|
||||
|
||||
cbp = g_clone_bio(bp);
|
||||
if (cbp == NULL) /* XXX */
|
||||
g_io_deliver(bp, ENOMEM);
|
||||
cbp->bio_done = gv_volume_done;
|
||||
cbp->bio_caller2 = p->consumer;
|
||||
|
||||
if (bp->bio_driver1 == NULL) {
|
||||
bp->bio_driver1 = cbp;
|
||||
} else {
|
||||
pbp = bp->bio_driver1;
|
||||
while (pbp->bio_caller1 != NULL)
|
||||
pbp = pbp->bio_caller1;
|
||||
pbp->bio_caller1 = cbp;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fire off all sub-requests. */
|
||||
pbp = bp->bio_driver1;
|
||||
while (pbp != NULL) {
|
||||
g_io_request(pbp, pbp->bio_caller2);
|
||||
pbp = pbp->bio_caller1;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
@ -211,6 +324,11 @@ gv_volume_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
|
||||
gp->access = gv_volume_access;
|
||||
gp->softc = v;
|
||||
first++;
|
||||
TAILQ_INIT(&v->bqueue);
|
||||
mtx_init(&v->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
|
||||
kthread_create(gv_vol_worker, v, NULL, 0, 0, "gv_v %s",
|
||||
v->name);
|
||||
v->flags |= GV_VOL_THREAD_ACTIVE;
|
||||
} else
|
||||
gp = v->geom;
|
||||
|
||||
@ -261,9 +379,13 @@ static int
|
||||
gv_volume_destroy_geom(struct gctl_req *req, struct g_class *mp,
|
||||
struct g_geom *gp)
|
||||
{
|
||||
struct gv_volume *v;
|
||||
|
||||
g_trace(G_T_TOPOLOGY, "gv_volume_destroy_geom: %s", gp->name);
|
||||
g_topology_assert();
|
||||
|
||||
v = gp->softc;
|
||||
gv_kill_vol_thread(v);
|
||||
g_wither_geom(gp, ENXIO);
|
||||
return (0);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user