1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-14 10:09:48 +00:00

Implement new reading algorithm, which will use parity component for reading

as well, even if device is in complete state.
I observe 40% of speed-up with this option for random read operations,
but slowdown for sequential reads.
Basically, without this option reading from a RAID3 device built from 5
components (c0-c4) looks like this:

	Request no.	Used components
	1		c0+c1+c2+c3
	2		c0+c1+c2+c3
	3		c0+c1+c2+c3

With the new feature:

	Request no.	Used components
	1		c0+c1+c2+c3
	2		(c1^c2^c3^c4)+c1+c2+c3
	3		c0+(c0^c2^c3^c4)+c2+c3
	4		c0+c1+(c0^c1^c3^c4)+c3
	5		c0+c1+c2+(c0^c1^c2^c4)
	6		c0+c1+c2+c3
	[...]
This commit is contained in:
Pawel Jakub Dawidek 2004-08-21 18:11:46 +00:00
parent 9295c6c555
commit f5a2f7feac
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=134124
5 changed files with 176 additions and 114 deletions

View File

@ -58,6 +58,8 @@ struct g_command class_commands[] = {
{ 'd', "dynamic", NULL, G_TYPE_NONE },
{ 'h', "hardcode", NULL, G_TYPE_NONE },
{ 'n', "noautosync", NULL, G_TYPE_NONE },
{ 'r', "round_robin", NULL, G_TYPE_NONE },
{ 'R', "noround_robin", NULL, G_TYPE_NONE },
G_OPT_SENTINEL
}
},
@ -73,6 +75,7 @@ struct g_command class_commands[] = {
{
{ 'h', "hardcode", NULL, G_TYPE_NONE },
{ 'n', "noautosync", NULL, G_TYPE_NONE },
{ 'r', "round_robin", NULL, G_TYPE_NONE },
G_OPT_SENTINEL
}
},
@ -99,10 +102,10 @@ void
usage(const char *comm)
{
fprintf(stderr,
"usage: %s label [-hnv] name prov prov prov [prov [...]]\n"
"usage: %s label [-hnrv] name prov prov prov [prov [...]]\n"
" %s clear [-v] prov [prov [...]]\n"
" %s dump prov [prov [...]]\n"
" %s configure [-adhnv] name\n"
" %s configure [-adhnrRv] name\n"
" %s rebuild [-v] name prov\n"
" %s insert [-hv] <-n number> name prov\n"
" %s remove [-v] <-n number> name\n"
@ -141,7 +144,8 @@ raid3_label(struct gctl_req *req)
u_char sector[512];
const char *str;
char param[16];
int *hardcode, *nargs, *noautosync, error, i;
int *hardcode, *nargs, *noautosync, *round_robin;
int error, i;
unsigned sectorsize;
off_t mediasize;
@ -184,6 +188,13 @@ raid3_label(struct gctl_req *req)
}
if (*noautosync)
md.md_mflags |= G_RAID3_DEVICE_FLAG_NOAUTOSYNC;
round_robin = gctl_get_paraml(req, "round_robin", sizeof(*round_robin));
if (round_robin == NULL) {
gctl_error(req, "No '%s' argument.", "round_robin");
return;
}
if (*round_robin)
md.md_mflags |= G_RAID3_DEVICE_FLAG_ROUND_ROBIN;
hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode));
if (hardcode == NULL) {
gctl_error(req, "No '%s' argument.", "hardcode");

View File

@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd Aug 18, 2004
.Dd Aug 20, 2004
.Dt GRAID3 8
.Os
.Sh NAME
@ -33,7 +33,7 @@
.Sh SYNOPSIS
.Nm
.Cm label
.Op Fl hnv
.Op Fl hnrv
.Ar name
.Ar prov
.Ar prov
@ -46,7 +46,7 @@
.Op Ar prov Op Ar ...
.Nm
.Cm configure
.Op Fl adhnv
.Op Fl adhnrRv
.Ar name
.Nm
.Cm rebuild
@ -106,6 +106,12 @@ Additional options include:
Hardcode providers' names in metadata.
.It Fl n
Turn off autosynchronization of stale components.
.It Fl r
Use parity component for reading in round-robin fashion.
Without this option parity component is not used at all for reading operations
when device is in complete state.
With this option specified random I/O read operations are even 40% faster,
but sequential reads are slower.
.El
.It Cm clear
Clear metadata on the given providers.
@ -122,6 +128,10 @@ Do not hardcode providers' names in metadata.
Hardcode providers' names in metadata.
.It Fl n
Turn off autosynchronization of stale components.
.It Fl r
Turn on round-robin reading.
.It Fl R
Turn off round-robin reading.
.El
.It Cm rebuild
Rebuild the given component forcibly.
@ -167,10 +177,10 @@ Additional options include:
Be more verbose.
.El
.Sh EXAMPLES
Use 3 disks to setup a RAID3 array. Create a file system, mount it, then unmount
it and stop device:
Use 3 disks to setup a RAID3 array (use round-robin reading feature).
Create a file system, mount it, then unmount it and stop device:
.Bd -literal -offset indent
graid3 label -v data da0 da1 da2
graid3 label -v -r data da0 da1 da2
newfs /dev/raid3/data
mount /dev/raid3/data /mnt
[...]

View File

@ -872,122 +872,104 @@ g_raid3_gather(struct bio *pbp)
{
struct g_raid3_softc *sc;
struct g_raid3_disk *disk;
struct bio *bp, *cbp;
struct bio *xbp, *fbp, *cbp;
off_t atom, cadd, padd, left;
sc = pbp->bio_to->geom->softc;
if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) {
/*
* Find bio for which we have to calculate data.
* While going through this path, check if all requests
* succeeded, if not, deny whole request.
* If we're in COMPLETE mode, we allow one request to fail,
* so if we find one, we're sending it to the parity consumer.
* If there are more failed requests, we deny whole request.
*/
xbp = fbp = NULL;
G_RAID3_FOREACH_BIO(pbp, cbp) {
if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) {
KASSERT(xbp == NULL, ("More than one parity bio."));
xbp = cbp;
}
if (cbp->bio_error == 0)
continue;
/*
* Find bio for which we should calculate data.
* While going through this path, check if all requests
* succeeded, if not, deny whole request.
* Found failed request.
*/
bp = NULL;
G_RAID3_FOREACH_BIO(pbp, cbp) {
if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) {
KASSERT(bp == NULL,
("More than one parity bio."));
bp = cbp;
}
if (cbp->bio_error == 0)
continue;
G_RAID3_LOGREQ(0, cbp, "Request failed.");
disk = cbp->bio_caller2;
if (disk != NULL) {
/*
* Found failed request.
* Actually this is pointless to bump syncid,
* because whole device is fucked up.
*/
sc->sc_bump_syncid = G_RAID3_BUMP_IMMEDIATELY;
g_raid3_event_send(disk,
G_RAID3_DISK_STATE_DISCONNECTED,
G_RAID3_EVENT_DONTWAIT);
}
if (fbp == NULL) {
if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) {
/*
* We are already in degraded mode, so we can't
* accept any failures.
*/
if (pbp->bio_error == 0)
pbp->bio_error = fbp->bio_error;
} else {
fbp = cbp;
}
} else {
/*
* Next failed request, that's too many.
*/
if (pbp->bio_error == 0)
pbp->bio_error = cbp->bio_error;
disk = cbp->bio_caller2;
if (disk != NULL) {
/*
* Actually this is pointless to bump syncid,
* because whole device is fucked up.
*/
sc->sc_bump_syncid = G_RAID3_BUMP_IMMEDIATELY;
g_raid3_event_send(disk,
G_RAID3_DISK_STATE_DISCONNECTED,
G_RAID3_EVENT_DONTWAIT);
}
pbp->bio_error = fbp->bio_error;
}
KASSERT(bp != NULL, ("NULL parity bio."));
if (pbp->bio_error != 0) {
/*
* Deny whole request.
*/
}
if (pbp->bio_error != 0)
goto finish;
if (fbp != NULL) {
struct g_consumer *cp;
/*
* One request failed, so send the same request to
* the parity consumer.
*/
disk = pbp->bio_driver2;
if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) {
pbp->bio_error = fbp->bio_error;
goto finish;
}
pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
pbp->bio_inbed--;
fbp->bio_flags &= ~(BIO_DONE | BIO_ERROR);
if (disk->d_no == sc->sc_ndisks - 1)
fbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
fbp->bio_error = 0;
fbp->bio_completed = 0;
fbp->bio_children = 0;
fbp->bio_inbed = 0;
cp = disk->d_consumer;
fbp->bio_caller2 = disk;
fbp->bio_to = cp->provider;
G_RAID3_LOGREQ(3, fbp, "Sending request (recover).");
KASSERT(cp->acr > 0 && cp->ace > 0,
("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
cp->acr, cp->acw, cp->ace));
g_io_request(fbp, cp);
return;
}
if (xbp != NULL) {
/*
* Calculate parity.
*/
G_RAID3_FOREACH_BIO(pbp, cbp) {
if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0)
continue;
g_raid3_xor(cbp->bio_data, bp->bio_data, bp->bio_data,
bp->bio_length);
}
bp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY;
} else {
/*
* If we're in COMPLETE mode, we allow one request to fail,
* so if we find one, we're sending it to the parity consumer.
* If there are more failed requests, we deny whole request.
*/
bp = NULL;
G_RAID3_FOREACH_BIO(pbp, cbp) {
if (cbp->bio_error == 0)
continue;
/*
* Found failed request.
*/
G_RAID3_LOGREQ(0, cbp, "Request failed.");
disk = cbp->bio_caller2;
if (disk != NULL) {
sc->sc_bump_syncid = G_RAID3_BUMP_IMMEDIATELY;
g_raid3_event_send(disk,
G_RAID3_DISK_STATE_DISCONNECTED,
G_RAID3_EVENT_DONTWAIT);
}
if (bp == NULL)
bp = cbp;
else {
/*
* Next failed request, that's too many.
*/
if (pbp->bio_error == 0)
pbp->bio_error = bp->bio_error;
}
}
if (pbp->bio_error != 0)
goto finish;
if (bp != NULL) {
struct g_consumer *cp;
/*
* One request failed, so send the same request to
* the parity consumer.
*/
disk = &sc->sc_disks[sc->sc_ndisks - 1];
if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) {
pbp->bio_error = bp->bio_error;
goto finish;
}
pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
pbp->bio_inbed--;
bp->bio_flags &= ~(BIO_DONE | BIO_ERROR);
bp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
bp->bio_error = 0;
bp->bio_completed = 0;
bp->bio_children = 0;
bp->bio_inbed = 0;
cp = disk->d_consumer;
bp->bio_caller2 = disk;
bp->bio_to = cp->provider;
G_RAID3_LOGREQ(3, bp, "Sending request (parity).");
KASSERT(cp->acr > 0 && cp->ace > 0,
("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
cp->acr, cp->acw, cp->ace));
g_io_request(bp, cp);
return;
g_raid3_xor(cbp->bio_data, xbp->bio_data, xbp->bio_data,
xbp->bio_length);
}
xbp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY;
}
atom = sc->sc_sectorsize / (sc->sc_ndisks - 1);
cadd = padd = 0;
@ -1304,6 +1286,7 @@ g_raid3_register_request(struct bio *pbp)
struct bio *cbp;
off_t offset, length;
u_int n, ndisks;
int round_robin;
ndisks = 0;
sc = pbp->bio_to->geom->softc;
@ -1318,12 +1301,19 @@ g_raid3_register_request(struct bio *pbp)
switch (pbp->bio_cmd) {
case BIO_READ:
ndisks = sc->sc_ndisks - 1;
pbp->bio_driver2 = &sc->sc_disks[sc->sc_ndisks - 1];
break;
case BIO_WRITE:
case BIO_DELETE:
ndisks = sc->sc_ndisks;
break;
}
if (sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE &&
(sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) {
round_robin = 1;
} else {
round_robin = 0;
}
for (n = 0; n < ndisks; n++) {
disk = &sc->sc_disks[n];
cbp = g_raid3_clone_bio(sc, pbp);
@ -1345,6 +1335,17 @@ g_raid3_register_request(struct bio *pbp)
disk = &sc->sc_disks[sc->sc_ndisks - 1];
cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED;
} else if (round_robin &&
disk->d_no == sc->sc_round_robin) {
/*
* In round-robin mode skip one data component
* and use parity component when reading.
*/
pbp->bio_driver2 = disk;
disk = &sc->sc_disks[sc->sc_ndisks - 1];
cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY;
sc->sc_round_robin++;
round_robin = 0;
}
break;
case BIO_WRITE:
@ -1382,6 +1383,14 @@ g_raid3_register_request(struct bio *pbp)
}
switch (pbp->bio_cmd) {
case BIO_READ:
if (round_robin) {
/*
* If we are in round-robin mode and 'round_robin' is
* still 1, it means, that we skipped parity component
* for this read and must reset sc_round_robin field.
*/
sc->sc_round_robin = 0;
}
G_RAID3_FOREACH_BIO(pbp, cbp) {
disk = cbp->bio_caller2;
cp = disk->d_consumer;
@ -2474,6 +2483,7 @@ g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md)
sc->sc_mediasize = md->md_mediasize;
sc->sc_sectorsize = md->md_sectorsize;
sc->sc_ndisks = md->md_all;
sc->sc_round_robin = 0;
sc->sc_flags = md->md_mflags;
sc->sc_bump_syncid = 0;
for (n = 0; n < sc->sc_ndisks; n++)
@ -2752,6 +2762,8 @@ g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
} \
} while (0)
ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN,
"ROUND-ROBIN");
#undef ADD_FLAG
}
sbuf_printf(sb, "</Flags>\n");

View File

@ -35,7 +35,7 @@
#define G_RAID3_CLASS_NAME "RAID3"
#define G_RAID3_MAGIC "GEOM::RAID3"
#define G_RAID3_VERSION 0
#define G_RAID3_VERSION 1
#define G_RAID3_DISK_FLAG_DIRTY 0x0000000000000001ULL
#define G_RAID3_DISK_FLAG_SYNCHRONIZING 0x0000000000000002ULL
@ -46,7 +46,9 @@
G_RAID3_DISK_FLAG_FORCE_SYNC)
#define G_RAID3_DEVICE_FLAG_NOAUTOSYNC 0x0000000000000001ULL
#define G_RAID3_DEVICE_FLAG_MASK (G_RAID3_DEVICE_FLAG_NOAUTOSYNC)
#define G_RAID3_DEVICE_FLAG_ROUND_ROBIN 0x0000000000000002ULL
#define G_RAID3_DEVICE_FLAG_MASK (G_RAID3_DEVICE_FLAG_NOAUTOSYNC | \
G_RAID3_DEVICE_FLAG_ROUND_ROBIN)
#ifdef _KERNEL
extern u_int g_raid3_debug;
@ -162,6 +164,7 @@ struct g_raid3_softc {
struct g_raid3_disk *sc_disks;
u_int sc_ndisks; /* Number of disks. */
u_int sc_round_robin;
struct g_raid3_disk *sc_syncdisk;
uma_zone_t sc_zone_64k;
@ -281,6 +284,8 @@ raid3_metadata_dump(const struct g_raid3_metadata *md)
else {
if ((md->md_mflags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0)
printf(" NOAUTOSYNC");
if ((md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0)
printf(" ROUND-ROBIN");
}
printf("\n");
printf(" dflags:");

View File

@ -93,7 +93,8 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp)
struct g_raid3_softc *sc;
struct g_raid3_disk *disk;
const char *name;
int *nargs, *autosync, *noautosync, do_sync = 0;
int *nargs, do_sync = 0;
int *autosync, *noautosync, *round_robin, *noround_robin;
u_int n;
g_topology_assert();
@ -122,15 +123,31 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp)
gctl_error(req, "No '%s' argument.", "noautosync");
return;
}
if (!*autosync && !*noautosync) {
gctl_error(req, "Nothing has changed.");
return;
}
if (*autosync && *noautosync) {
gctl_error(req, "'%s' and '%s' specified.", "autosync",
"noautosync");
return;
}
round_robin = gctl_get_paraml(req, "round_robin", sizeof(*round_robin));
if (round_robin == NULL) {
gctl_error(req, "No '%s' argument.", "round_robin");
return;
}
noround_robin = gctl_get_paraml(req, "noround_robin",
sizeof(*noround_robin));
if (noround_robin == NULL) {
gctl_error(req, "No '%s' argument.", "noround_robin");
return;
}
if (*round_robin && *noround_robin) {
gctl_error(req, "'%s' and '%s' specified.", "round_robin",
"noround_robin");
return;
}
if (!*autosync && !*noautosync && !*round_robin && !*noround_robin) {
gctl_error(req, "Nothing has changed.");
return;
}
if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0) {
if (*autosync) {
sc->sc_flags &= ~G_RAID3_DEVICE_FLAG_NOAUTOSYNC;
@ -140,6 +157,13 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp)
if (*noautosync)
sc->sc_flags |= G_RAID3_DEVICE_FLAG_NOAUTOSYNC;
}
if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) {
if (*noround_robin)
sc->sc_flags &= ~G_RAID3_DEVICE_FLAG_ROUND_ROBIN;
} else {
if (*round_robin)
sc->sc_flags |= G_RAID3_DEVICE_FLAG_ROUND_ROBIN;
}
for (n = 0; n < sc->sc_ndisks; n++) {
disk = &sc->sc_disks[n];
if (do_sync) {