1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-14 10:09:48 +00:00

- Reimplement I/O data allocation to prevent deadlocks.

Submitted by:	green

- Speed up synchronization process by using configurable number of I/O
  requests in parallel.
  + Add kern.geom.raid3.sync_requests tunable which defines how many parallel
    I/O requests should be used.
  + Retire kern.geom.raid3.reqs_per_sync and kern.geom.raid3.syncs_per_sec
    sysctls.
- Fix race between regular and synchronization requests.
- Reimplement raid3's data synchronization - do not use the topology lock
  for this purpose, as it may case deadlocks.
- Stop synchronization from pre-sync hook.
- Fix some other minor issues.

Tested by:	Mike Tancsa <mike@sentex.net>
MFC after:	3 days
This commit is contained in:
Pawel Jakub Dawidek 2006-03-13 01:03:18 +00:00
parent e62165c8b0
commit 3650be51e2
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=156612
3 changed files with 674 additions and 354 deletions

File diff suppressed because it is too large Load Diff

View File

@ -110,12 +110,13 @@ extern u_int g_raid3_debug;
*/
struct g_raid3_disk_sync {
struct g_consumer *ds_consumer; /* Consumer connected to our device. */
off_t ds_offset; /* Offset of next request to send. */
off_t ds_offset_done; /* Offset of already synchronized
off_t ds_offset; /* Offset of next request to send. */
off_t ds_offset_done; /* Offset of already synchronized
region. */
off_t ds_resync; /* Resynchronize from this offset. */
u_int ds_syncid; /* Disk's synchronization ID. */
u_char *ds_data;
off_t ds_resync; /* Resynchronize from this offset. */
u_int ds_syncid; /* Disk's synchronization ID. */
u_int ds_inflight; /* Number of in-flight sync requests. */
struct bio **ds_bios; /* BIOs for synchronization I/O. */
};
/*
@ -169,6 +170,23 @@ struct g_raid3_event {
/* Bump genid immediately. */
#define G_RAID3_BUMP_GENID 0x2
enum g_raid3_zones {
G_RAID3_ZONE_64K,
G_RAID3_ZONE_16K,
G_RAID3_ZONE_4K,
G_RAID3_NUM_ZONES
};
static __inline enum g_raid3_zones
g_raid3_zone(size_t nbytes) {
if (nbytes > 16384)
return (G_RAID3_ZONE_64K);
else if (nbytes > 4096)
return (G_RAID3_ZONE_16K);
else
return (G_RAID3_ZONE_4K);
};
struct g_raid3_softc {
u_int sc_state; /* Device state. */
uint64_t sc_mediasize; /* Device size. */
@ -180,18 +198,31 @@ struct g_raid3_softc {
uint32_t sc_id; /* Device unique ID. */
struct sx sc_lock;
struct bio_queue_head sc_queue;
struct mtx sc_queue_mtx;
struct proc *sc_worker;
struct bio_queue_head sc_regular_delayed; /* Delayed I/O requests due
collision with sync
requests. */
struct bio_queue_head sc_inflight; /* In-flight regular write
requests. */
struct bio_queue_head sc_sync_delayed; /* Delayed sync requests due
collision with regular
requests. */
struct g_raid3_disk *sc_disks;
u_int sc_ndisks; /* Number of disks. */
u_int sc_round_robin;
struct g_raid3_disk *sc_syncdisk;
uma_zone_t sc_zone_64k;
uma_zone_t sc_zone_16k;
uma_zone_t sc_zone_4k;
struct g_raid3_zone {
uma_zone_t sz_zone;
size_t sz_inuse;
size_t sz_max;
u_int sz_requested;
u_int sz_failed;
} sc_zones[G_RAID3_NUM_ZONES];
u_int sc_genid; /* Generation ID. */
u_int sc_syncid; /* Synchronization ID. */

View File

@ -51,7 +51,7 @@ g_raid3_find_device(struct g_class *mp, const char *name)
struct g_raid3_softc *sc;
struct g_geom *gp;
g_topology_assert();
g_topology_lock();
LIST_FOREACH(gp, &mp->geom, geom) {
sc = gp->softc;
if (sc == NULL)
@ -60,9 +60,12 @@ g_raid3_find_device(struct g_class *mp, const char *name)
continue;
if (strcmp(gp->name, name) == 0 ||
strcmp(sc->sc_name, name) == 0) {
g_topology_unlock();
sx_xlock(&sc->sc_lock);
return (sc);
}
}
g_topology_unlock();
return (NULL);
}
@ -72,7 +75,7 @@ g_raid3_find_disk(struct g_raid3_softc *sc, const char *name)
struct g_raid3_disk *disk;
u_int n;
g_topology_assert();
sx_assert(&sc->sc_lock, SX_XLOCKED);
for (n = 0; n < sc->sc_ndisks; n++) {
disk = &sc->sc_disks[n];
if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
@ -109,20 +112,6 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp)
gctl_error(req, "Invalid number of arguments.");
return;
}
name = gctl_get_asciiparam(req, "arg0");
if (name == NULL) {
gctl_error(req, "No 'arg%u' argument.", 0);
return;
}
sc = g_raid3_find_device(mp, name);
if (sc == NULL) {
gctl_error(req, "No such device: %s.", name);
return;
}
if (g_raid3_ndisks(sc, -1) < sc->sc_ndisks) {
gctl_error(req, "Not all disks connected.");
return;
}
autosync = gctl_get_paraml(req, "autosync", sizeof(*autosync));
if (autosync == NULL) {
gctl_error(req, "No '%s' argument.", "autosync");
@ -174,6 +163,21 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp)
gctl_error(req, "Nothing has changed.");
return;
}
name = gctl_get_asciiparam(req, "arg0");
if (name == NULL) {
gctl_error(req, "No 'arg%u' argument.", 0);
return;
}
sc = g_raid3_find_device(mp, name);
if (sc == NULL) {
gctl_error(req, "No such device: %s.", name);
return;
}
if (g_raid3_ndisks(sc, -1) < sc->sc_ndisks) {
gctl_error(req, "Not all disks connected.");
sx_xunlock(&sc->sc_lock);
return;
}
if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0) {
if (*autosync) {
sc->sc_flags &= ~G_RAID3_DEVICE_FLAG_NOAUTOSYNC;
@ -223,6 +227,7 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp)
}
}
}
sx_xunlock(&sc->sc_lock);
}
static void
@ -235,7 +240,6 @@ g_raid3_ctl_rebuild(struct gctl_req *req, struct g_class *mp)
const char *name;
int error, *nargs;
g_topology_assert();
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
if (nargs == NULL) {
gctl_error(req, "No '%s' argument.", "nargs");
@ -258,16 +262,19 @@ g_raid3_ctl_rebuild(struct gctl_req *req, struct g_class *mp)
name = gctl_get_asciiparam(req, "arg1");
if (name == NULL) {
gctl_error(req, "No 'arg%u' argument.", 1);
sx_xunlock(&sc->sc_lock);
return;
}
disk = g_raid3_find_disk(sc, name);
if (disk == NULL) {
gctl_error(req, "No such provider: %s.", name);
sx_xunlock(&sc->sc_lock);
return;
}
if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE &&
g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < sc->sc_ndisks) {
gctl_error(req, "There is one stale disk already.", name);
sx_xunlock(&sc->sc_lock);
return;
}
/*
@ -279,18 +286,20 @@ g_raid3_ctl_rebuild(struct gctl_req *req, struct g_class *mp)
disk->d_flags |= G_RAID3_DISK_FLAG_FORCE_SYNC;
g_raid3_update_metadata(disk);
pp = disk->d_consumer->provider;
g_topology_lock();
error = g_raid3_read_metadata(disk->d_consumer, &md);
g_topology_unlock();
g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED,
G_RAID3_EVENT_WAIT);
if (error != 0) {
gctl_error(req, "Cannot read metadata from %s.", pp->name);
sx_xunlock(&sc->sc_lock);
return;
}
error = g_raid3_add_disk(sc, pp, &md);
if (error != 0) {
if (error != 0)
gctl_error(req, "Cannot reconnect component %s.", pp->name);
return;
}
sx_xunlock(&sc->sc_lock);
}
static void
@ -302,8 +311,6 @@ g_raid3_ctl_stop(struct gctl_req *req, struct g_class *mp)
char param[16];
u_int i;
g_topology_assert();
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
if (nargs == NULL) {
gctl_error(req, "No '%s' argument.", "nargs");
@ -335,8 +342,10 @@ g_raid3_ctl_stop(struct gctl_req *req, struct g_class *mp)
if (error != 0) {
gctl_error(req, "Cannot destroy device %s (error=%d).",
sc->sc_geom->name, error);
sx_xunlock(&sc->sc_lock);
return;
}
/* No need to unlock, because lock is already dead. */
}
}
@ -363,7 +372,6 @@ g_raid3_ctl_insert(struct gctl_req *req, struct g_class *mp)
intmax_t *no;
int *hardcode, *nargs, error;
g_topology_assert();
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
if (nargs == NULL) {
gctl_error(req, "No '%s' argument.", "nargs");
@ -373,55 +381,77 @@ g_raid3_ctl_insert(struct gctl_req *req, struct g_class *mp)
gctl_error(req, "Invalid number of arguments.");
return;
}
name = gctl_get_asciiparam(req, "arg0");
if (name == NULL) {
gctl_error(req, "No 'arg%u' argument.", 0);
return;
}
sc = g_raid3_find_device(mp, name);
if (sc == NULL) {
gctl_error(req, "No such device: %s.", name);
return;
}
no = gctl_get_paraml(req, "number", sizeof(*no));
if (no == NULL) {
gctl_error(req, "No '%s' argument.", "no");
return;
}
if (*no >= sc->sc_ndisks) {
gctl_error(req, "Invalid component number.");
return;
}
hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode));
if (hardcode == NULL) {
gctl_error(req, "No '%s' argument.", "hardcode");
return;
}
disk = &sc->sc_disks[*no];
if (disk->d_state != G_RAID3_DISK_STATE_NODISK) {
gctl_error(req, "Component %u is already connected.", *no);
return;
}
name = gctl_get_asciiparam(req, "arg1");
if (name == NULL) {
gctl_error(req, "No 'arg%u' argument.", 1);
return;
}
no = gctl_get_paraml(req, "number", sizeof(*no));
if (no == NULL) {
gctl_error(req, "No '%s' argument.", "no");
return;
}
g_topology_lock();
pp = g_provider_by_name(name);
if (pp == NULL) {
g_topology_unlock();
gctl_error(req, "Invalid provider.");
return;
}
gp = g_new_geomf(mp, "raid3:insert");
gp->orphan = g_raid3_ctl_insert_orphan;
cp = g_new_consumer(gp);
error = g_attach(cp, pp);
if (error != 0) {
g_topology_unlock();
gctl_error(req, "Cannot attach to %s.", pp->name);
goto end;
}
error = g_access(cp, 0, 1, 1);
if (error != 0) {
g_topology_unlock();
gctl_error(req, "Cannot access %s.", pp->name);
goto end;
}
g_topology_unlock();
name = gctl_get_asciiparam(req, "arg0");
if (name == NULL) {
gctl_error(req, "No 'arg%u' argument.", 0);
goto end;
}
sc = g_raid3_find_device(mp, name);
if (sc == NULL) {
gctl_error(req, "No such device: %s.", name);
goto end;
}
if (*no >= sc->sc_ndisks) {
sx_xunlock(&sc->sc_lock);
gctl_error(req, "Invalid component number.");
goto end;
}
disk = &sc->sc_disks[*no];
if (disk->d_state != G_RAID3_DISK_STATE_NODISK) {
sx_xunlock(&sc->sc_lock);
gctl_error(req, "Component %u is already connected.", *no);
goto end;
}
if (((sc->sc_sectorsize / (sc->sc_ndisks - 1)) % pp->sectorsize) != 0) {
sx_xunlock(&sc->sc_lock);
gctl_error(req,
"Cannot insert provider %s, because of its sector size.",
pp->name);
return;
goto end;
}
compsize = sc->sc_mediasize / (sc->sc_ndisks - 1);
if (compsize > pp->mediasize - pp->sectorsize) {
sx_xunlock(&sc->sc_lock);
gctl_error(req, "Provider %s too small.", pp->name);
return;
goto end;
}
if (compsize < pp->mediasize - pp->sectorsize) {
gctl_error(req,
@ -429,20 +459,8 @@ g_raid3_ctl_insert(struct gctl_req *req, struct g_class *mp)
pp->name, (intmax_t)compsize,
(intmax_t)(pp->mediasize - pp->sectorsize));
}
gp = g_new_geomf(mp, "raid3:insert");
gp->orphan = g_raid3_ctl_insert_orphan;
cp = g_new_consumer(gp);
error = g_attach(cp, pp);
if (error != 0) {
gctl_error(req, "Cannot attach to %s.", pp->name);
goto end;
}
error = g_access(cp, 0, 1, 1);
if (error != 0) {
gctl_error(req, "Cannot access %s.", pp->name);
goto end;
}
g_raid3_fill_metadata(disk, &md);
sx_xunlock(&sc->sc_lock);
md.md_syncid = 0;
md.md_dflags = 0;
if (*hardcode)
@ -452,20 +470,20 @@ g_raid3_ctl_insert(struct gctl_req *req, struct g_class *mp)
md.md_provsize = pp->mediasize;
sector = g_malloc(pp->sectorsize, M_WAITOK);
raid3_metadata_encode(&md, sector);
g_topology_unlock();
error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector,
pp->sectorsize);
g_topology_lock();
g_free(sector);
if (error != 0)
gctl_error(req, "Cannot store metadata on %s.", pp->name);
end:
g_topology_lock();
if (cp->acw > 0)
g_access(cp, 0, -1, -1);
if (cp->provider != NULL)
g_detach(cp);
g_destroy_consumer(cp);
g_destroy_geom(gp);
g_topology_unlock();
}
static void
@ -477,7 +495,6 @@ g_raid3_ctl_remove(struct gctl_req *req, struct g_class *mp)
intmax_t *no;
int *nargs;
g_topology_assert();
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
if (nargs == NULL) {
gctl_error(req, "No '%s' argument.", "nargs");
@ -487,6 +504,11 @@ g_raid3_ctl_remove(struct gctl_req *req, struct g_class *mp)
gctl_error(req, "Invalid number of arguments.");
return;
}
no = gctl_get_paraml(req, "number", sizeof(*no));
if (no == NULL) {
gctl_error(req, "No '%s' argument.", "no");
return;
}
name = gctl_get_asciiparam(req, "arg0");
if (name == NULL) {
gctl_error(req, "No 'arg%u' argument.", 0);
@ -497,12 +519,8 @@ g_raid3_ctl_remove(struct gctl_req *req, struct g_class *mp)
gctl_error(req, "No such device: %s.", name);
return;
}
no = gctl_get_paraml(req, "number", sizeof(*no));
if (no == NULL) {
gctl_error(req, "No '%s' argument.", "no");
return;
}
if (*no >= sc->sc_ndisks) {
sx_xunlock(&sc->sc_lock);
gctl_error(req, "Invalid component number.");
return;
}
@ -517,7 +535,7 @@ g_raid3_ctl_remove(struct gctl_req *req, struct g_class *mp)
sc->sc_ndisks) {
gctl_error(req, "Cannot replace component number %u.",
*no);
return;
break;
}
/* FALLTHROUGH */
case G_RAID3_DISK_STATE_STALE:
@ -528,15 +546,16 @@ g_raid3_ctl_remove(struct gctl_req *req, struct g_class *mp)
} else {
g_raid3_event_send(disk,
G_RAID3_DISK_STATE_DISCONNECTED,
G_RAID3_EVENT_WAIT);
G_RAID3_EVENT_DONTWAIT);
}
break;
case G_RAID3_DISK_STATE_NODISK:
break;
default:
gctl_error(req, "Cannot replace component number %u.", *no);
return;
break;
}
sx_xunlock(&sc->sc_lock);
}
void
@ -556,6 +575,7 @@ g_raid3_config(struct gctl_req *req, struct g_class *mp, const char *verb)
return;
}
g_topology_unlock();
if (strcmp(verb, "configure") == 0)
g_raid3_ctl_configure(req, mp);
else if (strcmp(verb, "insert") == 0)
@ -568,4 +588,5 @@ g_raid3_config(struct gctl_req *req, struct g_class *mp, const char *verb)
g_raid3_ctl_stop(req, mp);
else
gctl_error(req, "Unknown verb.");
g_topology_lock();
}