1
0
mirror of https://git.FreeBSD.org/src.git synced 2025-01-13 14:40:22 +00:00

Remove unused vinum files.

This commit is contained in:
Poul-Henning Kamp 2004-11-04 09:57:21 +00:00
parent 6e67e2a710
commit 56f7479530
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=137198
26 changed files with 0 additions and 12139 deletions

View File

@ -1,37 +0,0 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $FreeBSD$
*/

View File

@ -1,78 +0,0 @@
#!/bin/sh
# Make statetexts.h from vinumstate.h
# $FreeBSD$
# $Id: makestatetext,v 1.7 1999/12/29 07:24:54 grog Exp grog $
infile=vinumstate.h
ofile=statetexts.h
echo >$ofile "/* Created by $0 on" `date`. "Do not edit */"
echo >>$ofile
cat >> $ofile <<FOO
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called \`\`Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided \`\`as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*/
FOO
echo >>$ofile "/* Drive state texts */"
echo >>$ofile "char *drivestatetext [] =
{ "
egrep -e 'drive_[A-z0-9]*,' <$infile | grep -v = | sed 's: *drive_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
/* Subdisk state texts */
char *sdstatetext [] =
{
FOO
egrep -e 'sd_[A-z0-9]*,' $infile | grep -v = | sed 's: *sd_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
/* Plex state texts */
char *plexstatetext [] =
{
FOO
egrep -e 'plex_[A-z0-9]*,' $infile | grep -v = | sed 's: *plex_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
/* Volume state texts */
char *volstatetext [] =
{
FOO
egrep -e 'volume_[A-z0-9]*,' $infile | grep -v = | sed 's: *volume_\([^,]*\).*: \"\1\",:' >>$ofile
cat <<FOO >> $ofile
};
FOO

View File

@ -1,273 +0,0 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: request.h,v 1.22 2003/04/24 04:37:08 grog Exp $
* $FreeBSD$
*/
/* Information needed to set up a transfer */
enum xferinfo {
XFR_NORMAL_READ = 1,
XFR_NORMAL_WRITE = 2, /* write request in normal mode */
XFR_RECOVERY_READ = 4,
XFR_DEGRADED_WRITE = 8,
XFR_PARITYLESS_WRITE = 0x10,
XFR_NO_PARITY_STRIPE = 0x20, /* parity stripe is not available */
XFR_DATA_BLOCK = 0x40, /* data block in request */
XFR_PARITY_BLOCK = 0x80, /* parity block in request */
XFR_BAD_SUBDISK = 0x100, /* this subdisk is dead */
XFR_MALLOCED = 0x200, /* this buffer is malloced */
#ifdef VINUMDEBUG
XFR_PHASE2 = 0x800, /* documentation only: 2nd phase write */
#endif
XFR_REVIVECONFLICT = 0x1000, /* possible conflict with a revive operation */
XFR_BUFLOCKED = 0x2000, /* BUF_LOCK performed on this buffer */
XFR_COPYBUF = 0x4000, /* data buffer was copied */
/* operations that need a parity block */
XFR_PARITYOP = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE),
/* operations that use the group parameters */
XFR_GROUPOP = (XFR_DEGRADED_WRITE | XFR_RECOVERY_READ),
/* operations that that use the data parameters */
XFR_DATAOP = (XFR_NORMAL_READ | XFR_NORMAL_WRITE | XFR_PARITYLESS_WRITE),
/* operations requiring read before write */
XFR_RBW = (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE),
/* operations that need a malloced buffer */
XFR_NEEDS_MALLOC = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE)
};
/*
* Describe one low-level request, part of a
* high-level request. This is an extended
* struct buf buffer, and the first element
* *must* be a struct buf. We pass this
* structure to the I/O routines instead of a
* struct buf in order to be able to locate the
* high-level request when it completes.
*
* All offsets and lengths are in sectors.
*/
struct rqelement {
struct buf b; /* buf structure */
struct rqgroup *rqg; /* pointer to our group */
/* Information about the transfer */
daddr_t sdoffset; /* offset in subdisk */
int useroffset; /* offset in user buffer of normal data */
/*
* dataoffset and datalen refer to "individual" data
* transfers which involve only this drive (normal read,
* parityless write) and also degraded write.
*
* groupoffset and grouplen refer to the other "group"
* operations (normal write, recovery read) which involve
* more than one drive. Both the offsets are relative to
* the start of the local buffer.
*/
int dataoffset; /* offset in buffer of the normal data */
int groupoffset; /* offset in buffer of group data */
short datalen; /* length of normal data (sectors) */
short grouplen; /* length of group data (sectors) */
short buflen; /* total buffer length to allocate */
short flags; /* really enum xferinfo (see above) */
/* Ways to find other components */
short sdno; /* subdisk number */
short driveno; /* drive number */
struct timeval launchtime; /* time of launch, for info function */
};
/*
* A group of requests built to satisfy an I/O
* transfer on a single plex.
*/
struct rqgroup {
struct rqgroup *next; /* pointer to next group */
struct request *rq; /* pointer to the request */
short count; /* number of requests in this group */
short active; /* and number active */
short plexno; /* index of plex */
int badsdno; /* index of bad subdisk or -1 */
enum xferinfo flags; /* description of transfer */
struct rangelock *lock; /* lock for this transfer */
daddr_t lockbase; /* and lock address */
struct rqelement rqe[0]; /* and the elements of this request */
};
/*
* Describe one high-level request and the
* work we have to do to satisfy it.
*/
struct request {
struct buf *bp; /* pointer to the high-level request */
caddr_t save_data; /* for copied write buffers */
enum xferinfo flags;
union {
int volno; /* volume index */
int plexno; /* or plex index */
} volplex;
int error; /* current error indication */
int sdno; /* reviving subdisk (XFR_REVIVECONFLICT) */
short isplex; /* set if this is a plex request */
short active; /* number of subrequests still active */
struct rqgroup *rqg; /* pointer to the first group of requests */
struct rqgroup *lrqg; /* and to the last group of requests */
struct request *next; /* link of waiting requests */
};
/*
* Extended buffer header for subdisk I/O. Includes
* a pointer to the user I/O request.
*/
struct sdbuf {
struct buf b; /* our buffer */
struct buf *bp; /* and pointer to parent */
short driveno; /* drive index */
short sdno; /* and subdisk index */
};
/*
* Values returned by rqe and friends. Be careful
* with these: they are in order of increasing
* seriousness. Some routines check for
* > REQUEST_RECOVERED to indicate a failed request. XXX
*/
enum requeststatus {
REQUEST_OK, /* request built OK */
REQUEST_RECOVERED, /* request OK, but involves RAID5 recovery */
REQUEST_DEGRADED, /* parts of request failed */
REQUEST_EOF, /* parts of request failed: outside plex */
REQUEST_DOWN, /* all of request failed: subdisk(s) down */
REQUEST_ENOMEM /* all of request failed: ran out of memory */
};
#ifdef VINUMDEBUG
/* Trace entry for request info (DEBUG_LASTREQS) */
enum rqinfo_type {
loginfo_unused, /* never been used */
loginfo_user_bp, /* this is the bp when strategy is called */
loginfo_user_bpl, /* and this is the bp at launch time */
loginfo_rqe, /* user RQE */
loginfo_iodone, /* iodone */
loginfo_raid5_data, /* write RAID-5 data block */
loginfo_raid5_parity, /* write RAID-5 parity block */
loginfo_sdio, /* subdisk I/O */
loginfo_sdiol, /* subdisk I/O launch */
loginfo_sdiodone, /* subdisk iodone */
loginfo_lockwait, /* wait for range lock */
loginfo_lock, /* lock range */
loginfo_unlock, /* unlock range */
};
/*
* This is the rangelock structure with an added
* buffer pointer and plex number. We don't need
* the plex number for the locking protocol, but
* it does help a lot when logging.
*/
struct rangelockinfo {
daddr_t stripe; /* address + 1 of the range being locked */
struct buf *bp; /* user's buffer pointer */
int plexno;
};
union rqinfou { /* info to pass to logrq */
struct buf *bp;
struct rqelement *rqe; /* address of request, for correlation */
struct rangelockinfo *lockinfo;
};
struct rqinfo {
enum rqinfo_type type; /* kind of event */
struct timeval timestamp; /* time it happened */
struct buf *bp; /* point to user buffer */
int devmajor; /* major and minor device info */
int devminor;
union {
struct buf b; /* yup, the *whole* buffer header */
struct rqelement rqe; /* and the whole rqe */
struct rangelock lockinfo;
} info;
};
#define RQINFO_SIZE 128 /* number of info slots in buffer */
void logrq(enum rqinfo_type type, union rqinfou info, struct buf *ubp);
#endif
/* Structures for the daemon */
/* types of request to the daemon */
enum daemonrq {
daemonrq_none, /* dummy to catch bugs */
daemonrq_ioerror, /* error occurred on I/O */
daemonrq_saveconfig, /* save configuration */
daemonrq_return, /* return to userland */
daemonrq_ping, /* show sign of life */
daemonrq_init, /* initialize a plex */
daemonrq_revive, /* revive a subdisk */
daemonrq_closedrive, /* close a drive */
};
/* info field for daemon requests */
union daemoninfo { /* and the request information */
struct request *rq; /* for daemonrq_ioerror */
struct sd *sd; /* for daemonrq_revive */
struct plex *plex; /* for daemonrq_init */
struct drive *drive; /* for daemonrq_closedrive */
int nothing; /* for passing NULL */
};
struct daemonq {
struct daemonq *next; /* pointer to next element in queue */
enum daemonrq type; /* type of request */
int privateinuse; /* private element, being used */
union daemoninfo info; /* and the request information */
};
void queue_daemon_request(enum daemonrq type, union daemoninfo info);
extern int daemon_options;
enum daemon_option {
daemon_verbose = 1, /* talk about what we're doing */
daemon_stopped = 2,
daemon_noupdate = 4, /* don't update the disk config, for recovery */
};
void freerq(struct request *rq);
void unlockrange(int plexno, struct rangelock *);
/* Local Variables: */
/* fill-column: 50 */
/* End: */

View File

@ -1,91 +0,0 @@
/* Created by ./makestatetext on Wed Jan 5 10:05:30 CST 2000. Do not edit */
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $FreeBSD$
*/
/* Drive state texts */
char *drivestatetext[] =
{
"unallocated",
"referenced",
"down",
"up",
};
/* Subdisk state texts */
char *sdstatetext[] =
{
"unallocated",
"uninit",
"referenced",
"init",
"empty",
"initializing",
"initialized",
"obsolete",
"stale",
"crashed",
"down",
"reviving",
"reborn",
"up",
};
/* Plex state texts */
char *plexstatetext[] =
{
"unallocated",
"referenced",
"init",
"faulty",
"down",
"initializing",
"corrupt",
"degraded",
"flaky",
"up",
};
/* Volume state texts */
char *volstatetext[] =
{
"unallocated",
"uninit",
"down",
"up",
};

View File

@ -1,542 +0,0 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* Written by Greg Lehey
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinum.c,v 1.44 2003/05/23 00:50:55 grog Exp grog $
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#define STATIC static /* nothing while we're testing */
#include <dev/vinum/vinumhdr.h>
#include <sys/sysproto.h> /* for sync(2) */
#ifdef VINUMDEBUG
#include <sys/reboot.h>
int debug = 0; /* debug flags */
extern int total_malloced;
extern int malloccount;
extern struct mc malloced[];
#endif
#include <dev/vinum/request.h>
struct cdevsw vinum_cdevsw = {
.d_version = D_VERSION,
.d_open = vinumopen,
.d_close = vinumclose,
.d_read = physread,
.d_write = physwrite,
.d_ioctl = vinumioctl,
.d_strategy = vinumstrategy,
.d_name = "vinum",
.d_flags = D_DISK | D_NEEDGIANT
};
/* Called by main() during pseudo-device attachment. */
void vinumattach(void *);
STATIC int vinum_modevent(module_t mod, modeventtype_t type, void *unused);
STATIC void vinum_clone(void *arg, char *name, int namelen, struct cdev ** dev);
struct _vinum_conf vinum_conf; /* configuration information */
struct cdev *vinum_daemon_dev;
struct cdev *vinum_super_dev;
static eventhandler_tag dev_clone_tag;
/*
* Mutexes for plex synchronization. Ideally each plex
* should have its own mutex, but the fact that the plex
* struct can move makes that very complicated. Instead,
* have plexes use share these mutexes based on modulo plex
* number.
*/
struct mtx plexmutex[PLEXMUTEXES];
/*
* Called by main() during pseudo-device attachment. All we need
* to do is allocate enough space for devices to be configured later, and
* add devsw entries.
*/
void
vinumattach(void *dummy)
{
char *envp;
int i;
#define MUTEXNAMELEN 16
char mutexname[MUTEXNAMELEN];
#if PLEXMUTEXES > 10000
#error Increase size of MUTEXNAMELEN
#endif
/* modload should prevent multiple loads, so this is worth a panic */
if ((vinum_conf.flags & VF_LOADED) != 0)
panic("vinum: already loaded");
log(LOG_INFO, "vinum: loaded\n");
#ifdef VINUMDEBUG
vinum_conf.flags |= VF_LOADED | VF_HASDEBUG; /* we're loaded now, and we support debug */
#else
vinum_conf.flags |= VF_LOADED; /* we're loaded now */
#endif
daemonq = NULL; /* initialize daemon's work queue */
dqend = NULL;
vinum_daemon_dev = make_dev(&vinum_cdevsw,
VINUM_DAEMON_MINOR,
UID_ROOT,
GID_WHEEL,
S_IRUSR | S_IWUSR,
"vinum/controld");
vinum_super_dev = make_dev(&vinum_cdevsw,
VINUM_SUPERDEV_MINOR,
UID_ROOT,
GID_WHEEL,
S_IRUSR | S_IWUSR,
"vinum/control");
vinum_conf.version = VINUMVERSION; /* note what version we are */
/* allocate space: drives... */
DRIVE = (struct drive *) Malloc(sizeof(struct drive) * INITIAL_DRIVES);
CHECKALLOC(DRIVE, "vinum: no memory\n");
bzero(DRIVE, sizeof(struct drive) * INITIAL_DRIVES);
vinum_conf.drives_allocated = INITIAL_DRIVES; /* number of drive slots allocated */
vinum_conf.drives_used = 0; /* and number in use */
/* volumes, ... */
VOL = (struct volume *) Malloc(sizeof(struct volume) * INITIAL_VOLUMES);
CHECKALLOC(VOL, "vinum: no memory\n");
bzero(VOL, sizeof(struct volume) * INITIAL_VOLUMES);
vinum_conf.volumes_allocated = INITIAL_VOLUMES; /* number of volume slots allocated */
vinum_conf.volumes_used = 0; /* and number in use */
/* plexes, ... */
PLEX = (struct plex *) Malloc(sizeof(struct plex) * INITIAL_PLEXES);
CHECKALLOC(PLEX, "vinum: no memory\n");
bzero(PLEX, sizeof(struct plex) * INITIAL_PLEXES);
vinum_conf.plexes_allocated = INITIAL_PLEXES; /* number of plex slots allocated */
vinum_conf.plexes_used = 0; /* and number in use */
for (i = 0; i < PLEXMUTEXES; i++) {
snprintf(mutexname, MUTEXNAMELEN, "vinumplex%d", i);
mtx_init(&plexmutex[i], mutexname, "plex", MTX_DEF);
}
/* and subdisks */
SD = (struct sd *) Malloc(sizeof(struct sd) * INITIAL_SUBDISKS);
CHECKALLOC(SD, "vinum: no memory\n");
bzero(SD, sizeof(struct sd) * INITIAL_SUBDISKS);
vinum_conf.subdisks_allocated = INITIAL_SUBDISKS; /* number of sd slots allocated */
vinum_conf.subdisks_used = 0; /* and number in use */
dev_clone_tag = EVENTHANDLER_REGISTER(dev_clone, vinum_clone, 0, 1000);
/*
* See if the loader has passed us any of the autostart
* options.
*/
envp = NULL;
if ((envp = getenv("vinum.autostart")) != NULL) { /* start all drives now */
vinum_scandisk(NULL);
freeenv(envp);
} else if ((envp = getenv("vinum.drives")) != NULL) {
vinum_scandisk(envp);
freeenv(envp);
}
}
/*
* Check if we have anything open. If confopen is != 0,
* that goes for the super device as well, otherwise
* only for volumes.
*
* Return 0 if not inactive, 1 if inactive.
*/
int
vinum_inactive(int confopen)
{
int i;
int can_do = 1; /* assume we can do it */
if (confopen && (vinum_conf.flags & VF_OPEN)) /* open by vinum(8)? */
return 0; /* can't do it while we're open */
lock_config();
for (i = 0; i < vinum_conf.volumes_allocated; i++) {
if ((VOL[i].state > volume_down)
&& (VOL[i].flags & VF_OPEN)) { /* volume is open */
can_do = 0;
break;
}
}
unlock_config();
return can_do;
}
/*
* Free all structures.
* If cleardrive is 0, save the configuration; otherwise
* remove the configuration from the drive.
*
* Before coming here, ensure that no volumes are open.
*/
void
free_vinum(int cleardrive)
{
int i;
int drives_allocated = vinum_conf.drives_allocated;
while ((vinum_conf.flags & (VF_STOPPING | VF_DAEMONOPEN))
== (VF_STOPPING | VF_DAEMONOPEN)) { /* at least one daemon open, we're stopping */
queue_daemon_request(daemonrq_return, (union daemoninfo) 0); /* stop the daemon */
tsleep(&vinumclose, PUSER, "vstop", 1); /* and wait for it */
}
if (DRIVE != NULL) {
if (cleardrive) { /* remove the vinum config */
for (i = 0; i < drives_allocated; i++)
remove_drive(i); /* remove the drive */
} else { /* keep the config */
for (i = 0; i < drives_allocated; i++)
free_drive(&DRIVE[i]); /* close files and things */
}
Free(DRIVE);
}
if (SD != NULL) {
for (i = 0; i < vinum_conf.subdisks_allocated; i++) {
struct sd *sd = &SD[i];
if (sd->state != sd_unallocated)
free_sd(i);
}
Free(SD);
}
if (PLEX != NULL) {
for (i = 0; i < vinum_conf.plexes_allocated; i++) {
struct plex *plex = &PLEX[i];
if (plex->state != plex_unallocated) /* we have real data there */
free_plex(i);
}
Free(PLEX);
}
if (VOL != NULL) {
for (i = 0; i < vinum_conf.volumes_allocated; i++) {
struct volume *volume = &VOL[i];
if (volume->state != volume_unallocated)
free_volume(i);
}
Free(VOL);
}
bzero(&vinum_conf, sizeof(vinum_conf));
vinum_conf.version = VINUMVERSION; /* reinstate version number */
}
STATIC int
vinum_modevent(module_t mod, modeventtype_t type, void *unused)
{
struct sync_args dummyarg =
{0};
int i;
switch (type) {
case MOD_LOAD:
vinumattach(NULL);
return 0; /* OK */
case MOD_UNLOAD:
if (!vinum_inactive(1)) /* is anything open? */
return EBUSY; /* yes, we can't do it */
vinum_conf.flags |= VF_STOPPING; /* note that we want to stop */
sync(curthread, &dummyarg); /* write out buffers */
free_vinum(0); /* clean up */
#ifdef VINUMDEBUG
if (total_malloced) {
int i;
#ifdef INVARIANTS
int *poke;
#endif
for (i = 0; i < malloccount; i++) {
if (debug & DEBUG_WARNINGS) /* want to hear about them */
log(LOG_WARNING,
"vinum: exiting with %d bytes malloced from %s:%d\n",
malloced[i].size,
malloced[i].file,
malloced[i].line);
#ifdef INVARIANTS
poke = &((int *) malloced[i].address)
[malloced[i].size / (2 * sizeof(int))]; /* middle of the area */
if (*poke == 0xdeadc0de) /* already freed */
log(LOG_ERR,
"vinum: exiting with malloc table inconsistency at %p from %s:%d\n",
malloced[i].address,
malloced[i].file,
malloced[i].line);
#endif
Free(malloced[i].address);
}
}
#endif
destroy_dev(vinum_daemon_dev); /* daemon device */
destroy_dev(vinum_super_dev);
for (i = 0; i < PLEXMUTEXES; i++)
mtx_destroy(&plexmutex[i]);
log(LOG_INFO, "vinum: unloaded\n"); /* tell the world */
EVENTHANDLER_DEREGISTER(dev_clone, dev_clone_tag);
return 0;
default:
return EOPNOTSUPP;
break;
}
return 0;
}
static moduledata_t vinum_mod =
{
"vinum",
(modeventhand_t) vinum_modevent,
0
};
DECLARE_MODULE(vinum, vinum_mod, SI_SUB_RAID, SI_ORDER_MIDDLE);
/* ARGSUSED */
/* Open a vinum object */
int
vinumopen(struct cdev *dev,
int flags,
int fmt,
struct thread *td)
{
int error;
unsigned int index;
struct volume *vol;
struct plex *plex;
struct sd *sd;
int devminor; /* minor number */
devminor = minor(dev);
error = 0;
/* First, decide what we're looking at */
switch (DEVTYPE(dev)) {
case VINUM_VOLUME_TYPE:
/*
* The super device and daemon device are the last two
* volume numbers, so check for them first.
*/
if ((devminor == VINUM_DAEMON_MINOR) /* daemon device */
||(devminor == VINUM_SUPERDEV_MINOR)) { /* or normal super device */
error = suser(td); /* are we root? */
if (error == 0) { /* yes, can do */
if (devminor == VINUM_DAEMON_MINOR) /* daemon device */
vinum_conf.flags |= VF_DAEMONOPEN; /* we're open */
else /* superdev */
vinum_conf.flags |= VF_OPEN; /* we're open */
}
return error;
}
/* Must be a real volume. Check. */
index = Volno(dev);
if (index >= vinum_conf.volumes_allocated)
return ENXIO; /* no such device */
vol = &VOL[index];
switch (vol->state) {
case volume_unallocated:
case volume_uninit:
return ENXIO;
case volume_up:
vol->flags |= VF_OPEN; /* note we're open */
return 0;
case volume_down:
return EIO;
default:
return EINVAL;
}
case VINUM_PLEX_TYPE:
index = Plexno(dev); /* get plex index in vinum_conf */
if (index >= vinum_conf.plexes_allocated)
return ENXIO; /* no such device */
plex = &PLEX[index];
switch (plex->state) {
case plex_unallocated:
return ENXIO;
case plex_referenced:
return EINVAL;
default:
plex->flags |= VF_OPEN; /* note we're open */
return 0;
}
case VINUM_SD_TYPE:
case VINUM_SD2_TYPE:
index = Sdno(dev); /* get the subdisk number */
if (index >= vinum_conf.subdisks_allocated) /* not a valid SD entry */
return ENXIO; /* no such device */
sd = &SD[index];
/*
* Opening a subdisk is always a special operation, so
* we ignore the state as long as it represents a real
* subdisk.
*/
switch (sd->state) {
case sd_unallocated:
return ENXIO;
case sd_uninit:
case sd_referenced:
return EINVAL;
default:
sd->flags |= VF_OPEN; /* note we're open */
return 0;
}
}
return 0; /* to keep the compiler happy */
}
/* ARGSUSED */
int
vinumclose(struct cdev *dev,
int flags,
int fmt,
struct thread *td)
{
unsigned int index;
struct volume *vol;
int devminor;
devminor = minor(dev);
/* First, decide what we're looking at */
switch (DEVTYPE(dev)) {
case VINUM_VOLUME_TYPE:
/*
* The super device and daemon device are the last two
* volume numbers, so check for them first.
*/
if ((devminor == VINUM_DAEMON_MINOR) /* daemon device */
||(devminor == VINUM_SUPERDEV_MINOR)) { /* or normal super device */
/*
* don't worry about whether we're root:
* nobody else would get this far.
*/
if (devminor == VINUM_SUPERDEV_MINOR) /* normal superdev */
vinum_conf.flags &= ~VF_OPEN; /* no longer open */
else { /* the daemon device */
vinum_conf.flags &= ~VF_DAEMONOPEN; /* no longer open */
if (vinum_conf.flags & VF_STOPPING) /* we're trying to stop, */
wakeup(&vinumclose); /* we can continue now */
}
return 0;
}
/* Real volume */
index = Volno(dev);
if (index >= vinum_conf.volumes_allocated)
return ENXIO; /* no such device */
vol = &VOL[index];
switch (vol->state) {
case volume_unallocated:
case volume_uninit:
return ENXIO;
case volume_up:
vol->flags &= ~VF_OPEN; /* reset our flags */
return 0;
case volume_down:
return EIO;
default:
return EINVAL;
}
case VINUM_PLEX_TYPE:
if (Volno(dev) >= vinum_conf.volumes_allocated)
return ENXIO;
index = Plexno (dev);
if (index >= vinum_conf.plexes_allocated) /* no such plex */
return ENXIO;
PLEX [index].flags &= ~VF_OPEN; /* no longer open */
return 0;
case VINUM_SD_TYPE:
if ((Volno(dev) >= vinum_conf.volumes_allocated) || /* no such volume */
(Plexno(dev) >= vinum_conf.plexes_allocated)) /* or no such plex */
return ENXIO; /* no such device */
index = Sdno (dev);
if (index >= vinum_conf.subdisks_allocated) /* no such sd */
return ENXIO;
SD [index].flags &= ~VF_OPEN; /* no longer open */
return 0;
default:
return ENODEV; /* don't know what to do with these */
}
}
void
vinum_clone(void *arg, char *name, int namelen, struct cdev ** dev)
{
struct volume *vol;
int i;
if (*dev != NULL)
return;
if (strncmp(name, "vinum/", sizeof("vinum/") - 1) != 0)
return;
name += sizeof("vinum/") - 1;
if ((i = find_volume(name, 0)) == -1)
return;
vol = &VOL[i];
*dev = vol->dev;
}
/* Local Variables: */
/* fill-column: 60 */
/* End: */

File diff suppressed because it is too large Load Diff

View File

@ -1,283 +0,0 @@
/* daemon.c: kernel part of Vinum daemon */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumdaemon.c,v 1.8 2000/01/03 05:22:03 grog Exp grog $
*/
#include <dev/vinum/vinumhdr.h>
#include <dev/vinum/request.h>
#ifdef VINUMDEBUG
#include <sys/reboot.h>
#endif
/* declarations */
void recover_io(struct request *rq);
int daemon_options = 0; /* options */
int daemonpid; /* PID of daemon */
struct daemonq *daemonq; /* daemon's work queue */
struct daemonq *dqend; /* and the end of the queue */
/*
* We normally call Malloc to get a queue element. In interrupt
* context, we can't guarantee that we'll get one, since we're not
* allowed to wait. If malloc fails, use one of these elements.
*/
#define INTQSIZE 4
struct daemonq intq[INTQSIZE]; /* queue elements for interrupt context */
struct daemonq *intqp; /* and pointer in it */
void
vinum_daemon(void)
{
int s;
struct daemonq *request;
PROC_LOCK(curproc);
curproc->p_flag |= P_SYSTEM; /* we're a system process */
mtx_lock_spin(&sched_lock);
curproc->p_sflag |= PS_INMEM;
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(curproc);
daemon_save_config(); /* start by saving the configuration */
daemonpid = curproc->p_pid; /* mark our territory */
while (1) {
tsleep(&vinum_daemon, PRIBIO, "vinum", 0); /* wait for something to happen */
/*
* It's conceivable that, as the result of an
* I/O error, we'll be out of action long
* enough that another daemon gets started.
* That's OK, just give up gracefully.
*/
if (curproc->p_pid != daemonpid) { /* we've been ousted in our sleep */
if (daemon_options & daemon_verbose)
log(LOG_INFO, "vinum: abdicating\n");
return;
}
while (daemonq != NULL) { /* we have work to do, */
s = splhigh(); /* don't get interrupted here */
request = daemonq; /* get the request */
daemonq = daemonq->next; /* and detach it */
if (daemonq == NULL) /* got to the end, */
dqend = NULL; /* no end any more */
splx(s);
switch (request->type) {
/*
* We had an I/O error on a request. Go through the
* request and try to salvage it
*/
case daemonrq_ioerror:
if (daemon_options & daemon_verbose) {
struct request *rq = request->info.rq;
log(LOG_WARNING,
"vinum: recovering I/O request: %p\n%s dev %d.%d, offset 0x%llx, length %ld\n",
rq,
rq->bp->b_iocmd == BIO_READ ? "Read" : "Write",
major(rq->bp->b_dev),
minor(rq->bp->b_dev),
(long long)rq->bp->b_blkno,
rq->bp->b_bcount);
}
recover_io(request->info.rq); /* the failed request */
break;
/*
* Write the config to disk. We could end up with
* quite a few of these in a row. Only honour the
* last one
*/
case daemonrq_saveconfig:
if ((daemonq == NULL) /* no more requests */
||(daemonq->type != daemonrq_saveconfig)) { /* or the next isn't the same */
if (((daemon_options & daemon_noupdate) == 0) /* we're allowed to do it */
&&((vinum_conf.flags & VF_READING_CONFIG) == 0)) { /* and we're not building the config now */
/*
* We obviously don't want to save a
* partial configuration. Less obviously,
* we don't need to do anything if we're
* asked to write the config when we're
* building it up, because we save it at
* the end.
*/
if (daemon_options & daemon_verbose)
log(LOG_INFO, "vinum: saving config\n");
daemon_save_config(); /* save it */
}
}
break;
case daemonrq_return: /* been told to stop */
if (daemon_options & daemon_verbose)
log(LOG_INFO, "vinum: stopping\n");
daemon_options |= daemon_stopped; /* note that we've stopped */
Free(request);
while (daemonq != NULL) { /* backed up requests, */
request = daemonq; /* get the request */
daemonq = daemonq->next; /* and detach it */
Free(request); /* then free it */
}
wakeup(&vinumclose); /* and wake any waiting vinum(8)s */
return;
case daemonrq_ping: /* tell the caller we're here */
if (daemon_options & daemon_verbose)
log(LOG_INFO, "vinum: ping reply\n");
wakeup(&vinum_finddaemon); /* wake up the caller */
break;
case daemonrq_closedrive: /* close a drive */
close_drive(request->info.drive); /* do it */
break;
case daemonrq_init: /* initialize a plex */
/* XXX */
case daemonrq_revive: /* revive a subdisk */
/* XXX */
/* FALLTHROUGH */
default:
log(LOG_WARNING, "Invalid request\n");
break;
}
if (request->privateinuse) /* one of ours, */
request->privateinuse = 0; /* no longer in use */
else
Free(request); /* return it */
}
}
}
/*
* Recover a failed I/O operation.
*
* The correct way to do this is to examine the request and determine
* how to recover each individual failure. In the case of a write,
* this could be as simple as doing nothing: the defective drives may
* already be down, and there may be nothing else to do. In case of
* a read, it will be necessary to retry if there are alternative
* copies of the data.
*
* The easy way (here) is just to reissue the request. This will take
* a little longer, but nothing like as long as the failure will have
* taken.
*
*/
void
recover_io(struct request *rq)
{
/*
* This should read:
*
* vinumstrategy(rq->bp);
*
* Negotiate with phk to get it fixed.
*/
DEV_STRATEGY(rq->bp); /* reissue the command */
}
/* Functions called to interface with the daemon */
/* queue a request for the daemon */
void
queue_daemon_request(enum daemonrq type, union daemoninfo info)
{
int s;
struct daemonq *qelt = (struct daemonq *) Malloc(sizeof(struct daemonq));
if (qelt == NULL) { /* malloc failed, we're prepared for that */
/*
* Take one of our spares. Give up if it's still in use; the only
* message we're likely to get here is a 'drive failed' message,
* and that'll come by again if we miss it.
*/
if (intqp->privateinuse) /* still in use? */
return; /* yes, give up */
qelt = intqp++;
if (intqp == &intq[INTQSIZE]) /* got to the end, */
intqp = intq; /* wrap around */
qelt->privateinuse = 1; /* it's ours, and it's in use */
} else
qelt->privateinuse = 0;
qelt->next = NULL; /* end of the chain */
qelt->type = type;
qelt->info = info;
s = splhigh();
if (daemonq) { /* something queued already */
dqend->next = qelt;
dqend = qelt;
} else { /* queue is empty, */
daemonq = qelt; /* this is the whole queue */
dqend = qelt;
}
splx(s);
wakeup(&vinum_daemon); /* and give the dæmon a kick */
}
/*
* see if the daemon is running. Return 0 (no error)
* if it is, ESRCH otherwise
*/
int
vinum_finddaemon()
{
int result;
if (daemonpid != 0) { /* we think we have a daemon, */
queue_daemon_request(daemonrq_ping, (union daemoninfo) 0); /* queue a ping */
result = tsleep(&vinum_finddaemon, PUSER, "reap", 2 * hz);
if (result == 0) /* yup, the daemon's up and running */
return 0;
}
/* no daemon, or we couldn't talk to it: start it */
vinum_daemon(); /* start the daemon */
return 0;
}
int
vinum_setdaemonopts(int options)
{
daemon_options = options;
return 0;
}

View File

@ -1,261 +0,0 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumext.h,v 1.33 2003/05/23 00:57:48 grog Exp $
* $FreeBSD$
*/
/* vinumext.h: external definitions */
/* *sigh* We still need this at the moment. */
#ifdef _KERNEL
extern struct _vinum_conf vinum_conf; /* configuration information */
extern struct mtx plexmutex[]; /* mutexes for plexes to use */
#else
extern struct __vinum_conf vinum_conf; /* configuration information */
#endif
#ifdef VINUMDEBUG
extern int debug; /* debug flags */
#endif
/* Physical read and write drive */
#define read_drive(a, b, c, d) driveio (a, b, c, d, BIO_READ)
#define write_drive(a, b, c, d) driveio (a, b, c, d, BIO_WRITE)
#define CHECKALLOC(ptr, msg) \
if (ptr == NULL) \
{ \
printf (msg); \
longjmp (command_fail, -1); \
}
#ifndef _KERNEL
struct vnode;
struct thread;
#endif
#ifdef _KERNEL
int vinum_inactive(int);
void free_vinum(int);
int give_sd_to_plex(int plexno, int sdno);
void give_sd_to_drive(int sdno);
int give_plex_to_volume(int, int, int);
struct drive *check_drive(char *);
enum drive_label_info read_drive_label(struct drive *, int);
int parse_config(char *, struct keywordset *, int);
int parse_user_config(char *cptr, struct keywordset *keyset);
u_int64_t sizespec(char *spec);
int volume_index(struct volume *volume);
int plex_index(struct plex *plex);
int sd_index(struct sd *sd);
int drive_index(struct drive *drive);
int my_plex(int volno, int plexno);
int my_sd(int plexno, int sdno);
int get_empty_drive(void);
int find_drive(const char *name, int create);
int find_drive_by_name(const char *devname, int create);
int get_empty_sd(void);
int find_subdisk(const char *name, int create);
void return_drive_space(int driveno, int64_t offset, int length);
void free_sd(int sdno);
void free_volume(int volno);
int get_empty_plex(void);
int find_plex(const char *name, int create);
void free_plex(int plexno);
int get_empty_volume(void);
int find_volume(const char *name, int create);
void config_subdisk(int);
void config_plex(int);
void config_volume(int);
void config_drive(int);
void updateconfig(int);
void update_sd_config(int sdno, int kernelstate);
void update_plex_config(int plexno, int kernelstate);
void update_volume_config(int volno);
void update_config(void);
void drive_io_done(struct buf *);
void save_config(void);
void daemon_save_config(void);
void write_config(char *, int);
int start_config(int);
void finish_config(int);
void remove(struct vinum_ioctl_msg *msg);
void remove_drive_entry(int driveno, int force);
void remove_sd_entry(int sdno, int force, int recurse);
void remove_plex_entry(int plexno, int force, int recurse);
void remove_volume_entry(int volno, int force, int recurse);
void checkdiskconfig(char *);
int open_drive(struct drive *, struct thread *, int);
void close_drive(struct drive *drive);
void close_locked_drive(struct drive *drive);
int driveio(struct drive *, char *, size_t, off_t, int);
int set_drive_parms(struct drive *drive);
int init_drive(struct drive *, int);
/* void throw_rude_remark (int, struct _ioctl_reply *, char *, ...); XXX */
void throw_rude_remark(int, char *,...);
void format_config(char *config, int len);
void checkkernel(char *op);
void free_drive(struct drive *drive);
void down_drive(struct drive *drive);
void remove_drive(int driveno);
int vinum_scandisk(char *drivename);
/* I/O */
d_open_t vinumopen;
d_close_t vinumclose;
d_strategy_t vinumstrategy;
d_ioctl_t vinumioctl;
int vinum_super_ioctl(struct cdev *, u_long, caddr_t);
int vinumstart(struct buf *bp, int reviveok);
int launch_requests(struct request *rq, int reviveok);
void sdio(struct buf *bp);
/* XXX Do we need this? */
int vinumpart(struct cdev *);
extern jmp_buf command_fail; /* return here if config fails */
#ifdef VINUMDEBUG
/* Memory allocation and request tracing */
void vinum_meminfo(caddr_t data);
int vinum_mallocinfo(caddr_t data);
int vinum_rqinfo(caddr_t data);
void LongJmp(jmp_buf, int);
char *basename(char *);
#endif
#ifdef VINUMDEBUG
void expand_table(void **, int, int, char *, int);
#else
void expand_table(void **, int, int);
#endif
struct disklabel;
struct request;
struct rqgroup *allocrqg(struct request *rq, int elements);
void deallocrqg(struct rqgroup *rqg);
/* Device number decoding */
int Volno(struct cdev *x);
int Plexno(struct cdev *x);
int Sdno(struct cdev *x);
/* State transitions */
int set_drive_state(int driveno, enum drivestate state, enum setstateflags flags);
int set_sd_state(int sdno, enum sdstate state, enum setstateflags flags);
enum requeststatus checksdstate(struct sd *sd, struct request *rq, daddr_t diskaddr, daddr_t diskend);
int set_plex_state(int plexno, enum plexstate state, enum setstateflags flags);
int set_volume_state(int volumeno, enum volumestate state, enum setstateflags flags);
void update_sd_state(int sdno);
void forceup(int plexno);
void update_plex_state(int plexno);
void update_volume_state(int volno);
void invalidate_subdisks(struct plex *, enum sdstate);
void start_object(struct vinum_ioctl_msg *);
void stop_object(struct vinum_ioctl_msg *);
void setstate(struct vinum_ioctl_msg *msg);
void setstate_by_force(struct vinum_ioctl_msg *msg);
void vinum_label(int);
int vinum_writedisklabel(struct volume *, struct disklabel *);
int initsd(int, int);
struct buf *parityrebuild(struct plex *, u_int64_t, int, enum parityop, struct rangelock **, off_t *);
enum requeststatus sddownstate(struct request *rq);
int restart_plex(int plexno);
int revive_read(struct sd *sd);
int revive_block(int sdno);
void parityops(struct vinum_ioctl_msg *);
/* Auxiliary functions */
enum sdstates sdstatemap(struct plex *plex);
enum volplexstate vpstate(struct plex *plex);
#endif
struct drive *validdrive(int driveno, struct _ioctl_reply *);
struct sd *validsd(int sdno, struct _ioctl_reply *);
struct plex *validplex(int plexno, struct _ioctl_reply *);
struct volume *validvol(int volno, struct _ioctl_reply *);
void resetstats(struct vinum_ioctl_msg *msg);
/* Locking */
#ifdef VINUMDEBUG
int lockdrive(struct drive *drive, char *, int);
#else
int lockdrive(struct drive *drive);
#endif
void unlockdrive(struct drive *drive);
int lockvol(struct volume *vol);
void unlockvol(struct volume *vol);
int lockplex(struct plex *plex);
void unlockplex(struct plex *plex);
struct rangelock *lockrange(daddr_t stripe, struct buf *bp, struct plex *plex);
int lock_config(void);
void unlock_config(void);
/* Dæmon */
void vinum_daemon(void);
int vinum_finddaemon(void);
int vinum_setdaemonopts(int);
extern struct daemonq *daemonq; /* daemon's work queue */
extern struct daemonq *dqend; /* and the end of the queue */
extern struct cdevsw vinum_cdevsw;
#undef Free /* defined in some funny net stuff */
#ifdef _KERNEL
#ifdef VINUMDEBUG
#define Malloc(x) MMalloc ((x), __FILE__, __LINE__) /* show where we came from */
#define Free(x) FFree ((x), __FILE__, __LINE__) /* show where we came from */
caddr_t MMalloc(int size, char *, int);
void FFree(void *mem, char *, int);
#define LOCKDRIVE(d) lockdrive (d, __FILE__, __LINE__)
#else
#define Malloc(x) malloc((x), M_DEVBUF, \
curthread->td_intr_nesting_level == 0? M_WAITOK: M_NOWAIT)
#define Free(x) free((x), M_DEVBUF)
#define LOCKDRIVE(d) lockdrive (d)
#endif
#else
#define Malloc(x) malloc ((x)) /* just the size */
#define Free(x) free ((x)) /* just the address */
#endif
/* Local Variables: */
/* fill-column: 50 */
/* End: */

View File

@ -1,81 +0,0 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*/
/* Header files used by all modules */
/*
* $Id: vinumhdr.h,v 1.19 2001/05/22 04:07:22 grog Exp grog $
* $FreeBSD$
*/
#include <sys/param.h>
#ifdef _KERNEL
#include "opt_vinum.h"
#include <sys/systm.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/conf.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/sysctl.h>
#endif
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/malloc.h>
#include <sys/uio.h>
#include <sys/namei.h>
#include <sys/stat.h>
#include <sys/disk.h>
#include <sys/disklabel.h>
#include <sys/syslog.h>
#include <sys/fcntl.h>
#include <sys/queue.h>
#ifdef _KERNEL
#include <machine/setjmp.h>
#include <machine/stdarg.h>
#else
#include <setjmp.h>
#include <stdarg.h>
#endif
#include <vm/vm.h>
#include <dev/vinum/vinumvar.h>
#include <dev/vinum/vinumio.h>
#include <dev/vinum/vinumkw.h>
#include <dev/vinum/vinumext.h>
#include <dev/vinum/vinumutil.h>
#include <machine/cpu.h>

View File

@ -1,473 +0,0 @@
/* vinuminterrupt.c: bottom half of the driver */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/*-
* Copyright (c) 1997, 1998, 1999
* Nan Yang Computer Services Limited. All rights reserved.
*
* Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
*
* Written by Greg Lehey
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinuminterrupt.c,v 1.41 2003/08/24 17:55:56 obrien Exp $
*/
#include <dev/vinum/vinumhdr.h>
#include <dev/vinum/request.h>
#include <sys/resourcevar.h>
void complete_raid5_write(struct rqelement *);
void complete_rqe(struct buf *bp);
void sdio_done(struct buf *bp);
/*
* Take a completed buffer, transfer the data back if
* it's a read, and complete the high-level request
* if this is the last subrequest.
*
* The bp parameter is in fact a struct rqelement, which
* includes a couple of extras at the end.
*/
void
complete_rqe(struct buf *bp)
{
struct rqelement *rqe;
struct request *rq;
struct rqgroup *rqg;
struct buf *ubp; /* user buffer */
struct drive *drive;
struct sd *sd;
char *gravity; /* for error messages */
rqe = (struct rqelement *) bp; /* point to the element element that completed */
rqg = rqe->rqg; /* and the request group */
rq = rqg->rq; /* and the complete request */
ubp = rq->bp; /* user buffer */
#ifdef VINUMDEBUG
if (debug & DEBUG_LASTREQS)
logrq(loginfo_iodone, (union rqinfou) rqe, ubp);
#endif
drive = &DRIVE[rqe->driveno];
drive->active--; /* one less outstanding I/O on this drive */
vinum_conf.active--; /* one less outstanding I/O globally */
if ((drive->active == (DRIVE_MAXACTIVE - 1)) /* we were at the drive limit */
||(vinum_conf.active == VINUM_MAXACTIVE)) /* or the global limit */
wakeup(&launch_requests); /* let another one at it */
if ((bp->b_io.bio_flags & BIO_ERROR) != 0) { /* transfer in error */
gravity = "";
sd = &SD[rqe->sdno];
if (bp->b_error != 0) /* did it return a number? */
rq->error = bp->b_error; /* yes, put it in. */
else if (rq->error == 0) /* no: do we have one already? */
rq->error = EIO; /* no: catchall "I/O error" */
sd->lasterror = rq->error;
if (bp->b_iocmd == BIO_READ) { /* read operation */
if ((rq->error == ENXIO) || (sd->flags & VF_RETRYERRORS) == 0) {
gravity = " fatal";
set_sd_state(rqe->sdno, sd_crashed, setstate_force); /* subdisk is crashed */
}
log(LOG_ERR,
"%s:%s read error, block %lld for %ld bytes\n",
gravity,
sd->name,
(long long)bp->b_blkno,
bp->b_bcount);
} else { /* write operation */
if ((rq->error == ENXIO) || (sd->flags & VF_RETRYERRORS) == 0) {
gravity = "fatal ";
set_sd_state(rqe->sdno, sd_stale, setstate_force); /* subdisk is stale */
}
log(LOG_ERR,
"%s:%s write error, block %lld for %ld bytes\n",
gravity,
sd->name,
(long long)bp->b_blkno,
bp->b_bcount);
}
log(LOG_ERR,
"%s: user buffer block %lld for %ld bytes\n",
sd->name,
(long long)ubp->b_blkno,
ubp->b_bcount);
if (rq->error == ENXIO) { /* the drive's down too */
log(LOG_ERR,
"%s: fatal drive I/O error, block %lld for %ld bytes\n",
DRIVE[rqe->driveno].label.name,
(long long)bp->b_blkno,
bp->b_bcount);
DRIVE[rqe->driveno].lasterror = rq->error;
set_drive_state(rqe->driveno, /* take the drive down */
drive_down,
setstate_force);
}
}
/* Now update the statistics */
if (bp->b_iocmd == BIO_READ) { /* read operation */
DRIVE[rqe->driveno].reads++;
DRIVE[rqe->driveno].bytes_read += bp->b_bcount;
SD[rqe->sdno].reads++;
SD[rqe->sdno].bytes_read += bp->b_bcount;
PLEX[rqe->rqg->plexno].reads++;
PLEX[rqe->rqg->plexno].bytes_read += bp->b_bcount;
if (PLEX[rqe->rqg->plexno].volno >= 0) { /* volume I/O, not plex */
VOL[PLEX[rqe->rqg->plexno].volno].reads++;
VOL[PLEX[rqe->rqg->plexno].volno].bytes_read += bp->b_bcount;
}
} else { /* write operation */
DRIVE[rqe->driveno].writes++;
DRIVE[rqe->driveno].bytes_written += bp->b_bcount;
SD[rqe->sdno].writes++;
SD[rqe->sdno].bytes_written += bp->b_bcount;
PLEX[rqe->rqg->plexno].writes++;
PLEX[rqe->rqg->plexno].bytes_written += bp->b_bcount;
if (PLEX[rqe->rqg->plexno].volno >= 0) { /* volume I/O, not plex */
VOL[PLEX[rqe->rqg->plexno].volno].writes++;
VOL[PLEX[rqe->rqg->plexno].volno].bytes_written += bp->b_bcount;
}
}
if (rqg->flags & XFR_RECOVERY_READ) { /* recovery read, */
int *sdata; /* source */
int *data; /* and group data */
int length; /* and count involved */
int count; /* loop counter */
struct rqelement *urqe = &rqg->rqe[rqg->badsdno]; /* rqe of the bad subdisk */
/* XOR destination is the user data */
sdata = (int *) &rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]; /* old data contents */
data = (int *) &urqe->b.b_data[urqe->groupoffset << DEV_BSHIFT]; /* destination */
length = urqe->grouplen * (DEV_BSIZE / sizeof(int)); /* and number of ints */
for (count = 0; count < length; count++)
data[count] ^= sdata[count];
/*
* In a normal read, we will normally read directly
* into the user buffer. This doesn't work if
* we're also doing a recovery, so we have to
* copy it
*/
if (rqe->flags & XFR_NORMAL_READ) { /* normal read as well, */
char *src = &rqe->b.b_data[rqe->dataoffset << DEV_BSHIFT]; /* read data is here */
char *dst;
dst = (char *) ubp->b_data + (rqe->useroffset << DEV_BSHIFT); /* where to put it in user buffer */
length = rqe->datalen << DEV_BSHIFT; /* and count involved */
bcopy(src, dst, length); /* move it */
}
} else if ((rqg->flags & (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE)) /* RAID 4/5 group write operation */
&&(rqg->active == 1)) /* and this is the last active request */
complete_raid5_write(rqe);
/*
* This is the earliest place where we can be
* sure that the request has really finished,
* since complete_raid5_write can issue new
* requests.
*/
rqg->active--; /* this request now finished */
if (rqg->active == 0) { /* request group finished, */
rq->active--; /* one less */
if (rqg->lock) { /* got a lock? */
unlockrange(rqg->plexno, rqg->lock); /* yes, free it */
rqg->lock = 0;
}
}
if (rq->active == 0) { /* request finished, */
#ifdef VINUMDEBUG
if (debug & DEBUG_RESID) {
if (ubp->b_resid != 0) /* still something to transfer? */
kdb_enter("resid");
}
#endif
if (rq->error) { /* did we have an error? */
if (rq->isplex) { /* plex operation, */
ubp->b_io.bio_flags |= BIO_ERROR; /* yes, propagate to user */
ubp->b_error = rq->error;
} else /* try to recover */
queue_daemon_request(daemonrq_ioerror, (union daemoninfo) rq); /* let the daemon complete */
} else {
ubp->b_resid = 0; /* completed our transfer */
if (rq->isplex == 0) /* volume request, */
VOL[rq->volplex.volno].active--; /* another request finished */
if (rq->flags & XFR_COPYBUF) {
Free(ubp->b_data);
ubp->b_data = rq->save_data;
}
bufdone(ubp); /* top level buffer completed */
freerq(rq); /* return the request storage */
}
}
}
/* Free a request block and anything hanging off it */
void
freerq(struct request *rq)
{
struct rqgroup *rqg;
struct rqgroup *nrqg; /* next in chain */
int rqno;
for (rqg = rq->rqg; rqg != NULL; rqg = nrqg) { /* through the whole request chain */
if (rqg->lock) /* got a lock? */
unlockrange(rqg->plexno, rqg->lock); /* yes, free it */
for (rqno = 0; rqno < rqg->count; rqno++) {
if ((rqg->rqe[rqno].flags & XFR_MALLOCED) /* data buffer was malloced, */
&&rqg->rqe[rqno].b.b_data) /* and the allocation succeeded */
Free(rqg->rqe[rqno].b.b_data); /* free it */
if (rqg->rqe[rqno].flags & XFR_BUFLOCKED) { /* locked this buffer, */
BUF_UNLOCK(&rqg->rqe[rqno].b); /* unlock it again */
BUF_LOCKFREE(&rqg->rqe[rqno].b);
}
}
nrqg = rqg->next; /* note the next one */
Free(rqg); /* and free this one */
}
Free(rq); /* free the request itself */
}
/* I/O on subdisk completed */
void
sdio_done(struct buf *bp)
{
struct sdbuf *sbp;
sbp = (struct sdbuf *) bp;
if (sbp->b.b_io.bio_flags & BIO_ERROR) { /* had an error */
sbp->bp->b_io.bio_flags |= BIO_ERROR; /* propagate upwards */
sbp->bp->b_error = sbp->b.b_error;
}
#ifdef VINUMDEBUG
if (debug & DEBUG_LASTREQS)
logrq(loginfo_sdiodone, (union rqinfou) bp, bp);
#endif
sbp->bp->b_resid = sbp->b.b_resid; /* copy the resid field */
/* Now update the statistics */
if (bp->b_iocmd == BIO_READ) { /* read operation */
DRIVE[sbp->driveno].reads++;
DRIVE[sbp->driveno].bytes_read += sbp->b.b_bcount;
SD[sbp->sdno].reads++;
SD[sbp->sdno].bytes_read += sbp->b.b_bcount;
} else { /* write operation */
DRIVE[sbp->driveno].writes++;
DRIVE[sbp->driveno].bytes_written += sbp->b.b_bcount;
SD[sbp->sdno].writes++;
SD[sbp->sdno].bytes_written += sbp->b.b_bcount;
}
bufdone(sbp->bp); /* complete the caller's I/O */
BUF_UNLOCK(&sbp->b);
BUF_LOCKFREE(&sbp->b);
Free(sbp);
}
/* Start the second phase of a RAID-4 or RAID-5 group write operation. */
void
complete_raid5_write(struct rqelement *rqe)
{
int *sdata; /* source */
int *pdata; /* and parity block data */
int length; /* and count involved */
int count; /* loop counter */
int rqno; /* request index */
int rqoffset; /* offset of request data from parity data */
struct buf *ubp; /* user buffer header */
struct request *rq; /* pointer to our request */
struct rqgroup *rqg; /* and to the request group */
struct rqelement *prqe; /* point to the parity block */
struct drive *drive; /* drive to access */
rqg = rqe->rqg; /* and to our request group */
rq = rqg->rq; /* point to our request */
ubp = rq->bp; /* user's buffer header */
prqe = &rqg->rqe[0]; /* point to the parity block */
/*
* If we get to this function, we have normal or
* degraded writes, or a combination of both. We do
* the same thing in each case: we perform an
* exclusive or to the parity block. The only
* difference is the origin of the data and the
* address range.
*/
if (rqe->flags & XFR_DEGRADED_WRITE) { /* do the degraded write stuff */
pdata = (int *) (&prqe->b.b_data[(prqe->groupoffset) << DEV_BSHIFT]); /* parity data pointer */
bzero(pdata, prqe->grouplen << DEV_BSHIFT); /* start with nothing in the parity block */
/* Now get what data we need from each block */
for (rqno = 1; rqno < rqg->count; rqno++) { /* for all the data blocks */
rqe = &rqg->rqe[rqno]; /* this request */
sdata = (int *) (&rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]); /* old data */
length = rqe->grouplen << (DEV_BSHIFT - 2); /* and count involved */
/*
* Add the data block to the parity block. Before
* we started the request, we zeroed the parity
* block, so the result of adding all the other
* blocks and the block we want to write will be
* the correct parity block.
*/
for (count = 0; count < length; count++)
pdata[count] ^= sdata[count];
if ((rqe->flags & XFR_MALLOCED) /* the buffer was malloced, */
&&((rqg->flags & XFR_NORMAL_WRITE) == 0)) { /* and we have no normal write, */
Free(rqe->b.b_data); /* free it now */
rqe->flags &= ~XFR_MALLOCED;
}
}
}
if (rqg->flags & XFR_NORMAL_WRITE) { /* do normal write stuff */
/* Get what data we need from each block */
for (rqno = 1; rqno < rqg->count; rqno++) { /* for all the data blocks */
rqe = &rqg->rqe[rqno]; /* this request */
if ((rqe->flags & (XFR_DATA_BLOCK | XFR_BAD_SUBDISK | XFR_NORMAL_WRITE))
== (XFR_DATA_BLOCK | XFR_NORMAL_WRITE)) { /* good data block to write */
sdata = (int *) &rqe->b.b_data[rqe->dataoffset << DEV_BSHIFT]; /* old data contents */
rqoffset = rqe->dataoffset + rqe->sdoffset - prqe->sdoffset; /* corresponding parity block offset */
pdata = (int *) (&prqe->b.b_data[rqoffset << DEV_BSHIFT]); /* parity data pointer */
length = rqe->datalen * (DEV_BSIZE / sizeof(int)); /* and number of ints */
/*
* "remove" the old data block
* from the parity block
*/
if ((pdata < ((int *) prqe->b.b_data))
|| (&pdata[length] > ((int *) (prqe->b.b_data + prqe->b.b_bcount)))
|| (sdata < ((int *) rqe->b.b_data))
|| (&sdata[length] > ((int *) (rqe->b.b_data + rqe->b.b_bcount))))
panic("complete_raid5_write: bounds overflow");
for (count = 0; count < length; count++)
pdata[count] ^= sdata[count];
/* "add" the new data block */
sdata = (int *) (&ubp->b_data[rqe->useroffset << DEV_BSHIFT]); /* new data */
if ((sdata < ((int *) ubp->b_data))
|| (&sdata[length] > ((int *) (ubp->b_data + ubp->b_bcount))))
panic("complete_raid5_write: bounds overflow");
for (count = 0; count < length; count++)
pdata[count] ^= sdata[count];
/* Free the malloced buffer */
if (rqe->flags & XFR_MALLOCED) { /* the buffer was malloced, */
Free(rqe->b.b_data); /* free it */
rqe->flags &= ~XFR_MALLOCED;
} else
panic("complete_raid5_write: malloc conflict");
if ((rqe->b.b_iocmd == BIO_READ) /* this was a read */
&&((rqe->flags & XFR_BAD_SUBDISK) == 0)) { /* and we can write this block */
rqe->b.b_flags &= ~B_DONE; /* start a new request */
rqe->b.b_iocmd = BIO_WRITE; /* we're writing now */
rqe->b.b_iodone = complete_rqe; /* call us here when done */
rqe->flags &= ~XFR_PARITYOP; /* reset flags that brought us here */
rqe->b.b_data = &ubp->b_data[rqe->useroffset << DEV_BSHIFT]; /* point to the user data */
rqe->b.b_bcount = rqe->datalen << DEV_BSHIFT; /* length to write */
rqe->b.b_bufsize = rqe->b.b_bcount; /* don't claim more */
rqe->b.b_resid = rqe->b.b_bcount; /* nothing transferred */
rqe->b.b_blkno += rqe->dataoffset; /* point to the correct block */
rqe->b.b_offset = rqe->b.b_blkno << DEV_BSHIFT;
rqe->b.b_iooffset = rqe->b.b_offset;
rqg->active++; /* another active request */
drive = &DRIVE[rqe->driveno]; /* drive to access */
/* We can't sleep here, so we just increment the counters. */
drive->active++;
if (drive->active >= drive->maxactive)
drive->maxactive = drive->active;
vinum_conf.active++;
if (vinum_conf.active >= vinum_conf.maxactive)
vinum_conf.maxactive = vinum_conf.active;
#ifdef VINUMDEBUG
if (debug & DEBUG_ADDRESSES)
log(LOG_DEBUG,
" %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%llx, length %ld\n",
rqe->b.b_iocmd == BIO_READ ? "Read" : "Write",
major(rqe->b.b_dev),
minor(rqe->b.b_dev),
rqe->sdno,
(u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
(long long)rqe->b.b_blkno,
rqe->b.b_bcount);
if (debug & DEBUG_LASTREQS)
logrq(loginfo_raid5_data, (union rqinfou) rqe, ubp);
#endif
DEV_STRATEGY(&rqe->b);
}
}
}
}
/* Finally, write the parity block */
rqe = &rqg->rqe[0];
rqe->b.b_flags &= ~B_DONE; /* we're not done */
rqe->b.b_iocmd = BIO_WRITE; /* we're writing now */
rqe->b.b_iodone = complete_rqe; /* call us here when done */
rqg->flags &= ~XFR_PARITYOP; /* reset flags that brought us here */
rqe->b.b_bcount = rqe->buflen << DEV_BSHIFT; /* length to write */
rqe->b.b_offset = rqe->b.b_blkno << DEV_BSHIFT;
rqe->b.b_iooffset = rqe->b.b_offset;
rqe->b.b_bufsize = rqe->b.b_bcount; /* don't claim we have more */
rqe->b.b_resid = rqe->b.b_bcount; /* nothing transferred */
rqg->active++; /* another active request */
drive = &DRIVE[rqe->driveno]; /* drive to access */
/* We can't sleep here, so we just increment the counters. */
drive->active++;
if (drive->active >= drive->maxactive)
drive->maxactive = drive->active;
vinum_conf.active++;
if (vinum_conf.active >= vinum_conf.maxactive)
vinum_conf.maxactive = vinum_conf.active;
#ifdef VINUMDEBUG
if (debug & DEBUG_ADDRESSES)
log(LOG_DEBUG,
" %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%llx, length %ld\n",
rqe->b.b_iocmd == BIO_READ ? "Read" : "Write",
major(rqe->b.b_dev),
minor(rqe->b.b_dev),
rqe->sdno,
(u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
(long long)rqe->b.b_blkno,
rqe->b.b_bcount);
if (debug & DEBUG_LASTREQS)
logrq(loginfo_raid5_parity, (union rqinfou) rqe, ubp);
#endif
DEV_STRATEGY(&rqe->b);
}
/* Local Variables: */
/* fill-column: 50 */
/* End: */

View File

@ -1,918 +0,0 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumio.c,v 1.39 2003/05/23 00:59:53 grog Exp grog $
* $FreeBSD$
*/
#include <dev/vinum/vinumhdr.h>
#include <dev/vinum/request.h>
static char *sappend(char *txt, char *s);
static int drivecmp(const void *va, const void *vb);
/*
* Open the device associated with the drive, and
* set drive's vp. Return an error number.
*/
int
open_drive(struct drive *drive, struct thread *td, int verbose)
{
struct cdevsw *dsw; /* pointer to cdevsw entry */
if (drive->flags & VF_OPEN) /* open already, */
return EBUSY; /* don't do it again */
drive->dev = getdiskbyname(drive->devicename);
if (drive->dev == NULL) /* didn't find anything */
return ENOENT;
dev_ref(drive->dev);
drive->dev->si_iosize_max = DFLTPHYS;
dsw = devsw(drive->dev);
if (dsw == NULL) /* sanity, should not happen */
drive->lasterror = ENOENT;
else if ((dsw->d_flags & D_DISK) == 0)
drive->lasterror = ENOTBLK;
else {
DROP_GIANT();
drive->lasterror = (dsw->d_open) (drive->dev, FWRITE | FREAD, 0, td);
PICKUP_GIANT();
}
if (drive->lasterror != 0) { /* failed */
drive->state = drive_down; /* just force it down */
if (verbose)
log(LOG_WARNING,
"vinum open_drive %s: failed with error %d\n",
drive->devicename, drive->lasterror);
} else
drive->flags |= VF_OPEN; /* we're open now */
return drive->lasterror;
}
/*
* Set some variables in the drive struct in more
* convenient form. Return error indication.
*/
int
set_drive_parms(struct drive *drive)
{
drive->blocksize = BLKDEV_IOSIZE; /* do we need this? */
drive->secsperblock = drive->blocksize /* number of sectors per block */
/ drive->sectorsize;
/* Now update the label part */
bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
microtime(&drive->label.date_of_birth); /* and current time */
drive->label.drive_size = drive->mediasize; /* size of the drive in bytes */
#ifdef VINUMDEBUG
if (debug & DEBUG_BIGDRIVE) /* pretend we're 100 times as big */
drive->label.drive_size *= 100;
#endif
/* number of sectors available for subdisks */
drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
/*
* Bug in 3.0 as of January 1998: you can open
* non-existent slices. They have a length of 0.
*/
if (drive->label.drive_size < MINVINUMSLICE) { /* too small to worry about */
set_drive_state(drive->driveno, drive_down, setstate_force);
drive->lasterror = ENOSPC;
return ENOSPC;
}
drive->freelist_size = INITIAL_DRIVE_FREELIST; /* initial number of entries */
drive->freelist = (struct drive_freelist *)
Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
if (drive->freelist == NULL) /* can't malloc, dammit */
return ENOSPC;
drive->freelist_entries = 1; /* just (almost) the complete drive */
drive->freelist[0].offset = DATASTART; /* starts here */
drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
if (drive->label.name[0] != '\0') /* got a name */
set_drive_state(drive->driveno, drive_up, setstate_force); /* our drive is accessible */
else /* we know about it, but that's all */
drive->state = drive_referenced;
return 0;
}
/*
* Initialize a drive: open the device and add
* device information.
*/
int
init_drive(struct drive *drive, int verbose)
{
drive->lasterror = open_drive(drive, curthread, verbose); /* open the drive */
if (drive->lasterror)
return drive->lasterror;
DROP_GIANT();
drive->lasterror = (*devsw(drive->dev)->d_ioctl) (drive->dev,
DIOCGSECTORSIZE,
(caddr_t) & drive->sectorsize,
FREAD,
curthread);
if (drive->lasterror == 0)
drive->lasterror = (*devsw(drive->dev)->d_ioctl) (drive->dev,
DIOCGMEDIASIZE,
(caddr_t) & drive->mediasize,
FREAD,
curthread);
PICKUP_GIANT();
if (drive->lasterror) {
if (verbose)
log(LOG_ERR,
"vinum: Can't get drive dimensions for %s: error %d\n",
drive->devicename,
drive->lasterror);
close_drive(drive);
return drive->lasterror;
}
return set_drive_parms(drive); /* set various odds and ends */
}
/* Close a drive if it's open. */
void
close_drive(struct drive *drive)
{
LOCKDRIVE(drive); /* keep the daemon out */
if (drive->flags & VF_OPEN)
close_locked_drive(drive); /* and close it */
if (drive->state > drive_down) /* if it's up */
drive->state = drive_down; /* make sure it's down */
unlockdrive(drive);
}
/*
* Real drive close code, called with drive already locked.
* We have also checked that the drive is open. No errors.
*/
void
close_locked_drive(struct drive *drive)
{
int error;
/*
* If we can't access the drive, we can't flush
* the queues, which spec_close() will try to
* do. Get rid of them here first.
*/
DROP_GIANT();
error = (*devsw(drive->dev)->d_close) (drive->dev, FWRITE | FREAD, 0, NULL);
PICKUP_GIANT();
drive->flags &= ~VF_OPEN; /* no longer open */
if (drive->lasterror == 0)
drive->lasterror = error;
}
/*
* Remove drive from the configuration.
* Caller must ensure that it isn't active.
*/
void
remove_drive(int driveno)
{
struct drive *drive = &vinum_conf.drive[driveno];
struct vinum_hdr *vhdr; /* buffer for header */
int error;
if (drive->state > drive_referenced) { /* real drive */
if (drive->state == drive_up) {
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffer */
CHECKALLOC(vhdr, "Can't allocate memory");
error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
if (error)
drive->lasterror = error;
else {
vhdr->magic = VINUM_NOMAGIC; /* obliterate the magic, but leave the rest */
write_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
}
Free(vhdr);
}
free_drive(drive); /* close it and free resources */
save_config(); /* and save the updated configuration */
}
}
/*
* Transfer drive data. Usually called from one of these defines;
* #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
* #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE)
*
* length and offset are in bytes, but must be multiples of sector
* size. The function *does not check* for this condition, and
* truncates ruthlessly.
* Return error number.
*/
int
driveio(struct drive *drive, char *buf, size_t length, off_t offset, int flag)
{
int error;
struct buf *bp;
error = 0; /* to keep the compiler happy */
while (length) { /* divide into small enough blocks */
int len = min(length, MAXBSIZE); /* maximum block device transfer is MAXBSIZE */
bp = geteblk(len); /* get a buffer header */
bp->b_flags = 0;
bp->b_iocmd = flag;
bp->b_dev = drive->dev; /* device */
bp->b_blkno = offset / drive->sectorsize; /* block number */
bp->b_offset = offset;
bp->b_iooffset = offset;
bp->b_saveaddr = bp->b_data;
bp->b_data = buf;
bp->b_bcount = len;
DEV_STRATEGY(bp); /* initiate the transfer */
error = bufwait(bp);
bp->b_data = bp->b_saveaddr;
bp->b_flags |= B_INVAL | B_AGE;
bp->b_ioflags &= ~BIO_ERROR;
brelse(bp);
if (error)
break;
length -= len; /* update pointers */
buf += len;
offset += len;
}
return error;
}
/*
* Check a drive for a vinum header. If found,
* update the drive information. We come here
* with a partially populated drive structure
* which includes the device name.
*
* Return information on what we found.
*
* This function is called from two places: check_drive,
* which wants to find out whether the drive is a
* Vinum drive, and config_drive, which asserts that
* it is a vinum drive. In the first case, we don't
* print error messages (verbose==0), in the second
* we do (verbose==1).
*/
enum drive_label_info
read_drive_label(struct drive *drive, int verbose)
{
int error;
int result; /* result of our search */
struct vinum_hdr *vhdr; /* and as header */
error = init_drive(drive, 0); /* find the drive */
if (error) /* find the drive */
return DL_CANT_OPEN; /* not ours */
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* allocate buffers */
CHECKALLOC(vhdr, "Can't allocate memory");
drive->state = drive_up; /* be optimistic */
error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
if (vhdr->magic == VINUM_MAGIC) { /* ours! */
if (drive->label.name[0] /* we have a name for this drive */
&&(strcmp(drive->label.name, vhdr->label.name))) { /* but it doesn't match the real name */
drive->lasterror = EINVAL;
result = DL_WRONG_DRIVE; /* it's the wrong drive */
drive->state = drive_unallocated; /* put it back, it's not ours */
} else
result = DL_OURS;
/*
* We copy the drive anyway so that we have
* the correct name in the drive info. This
* may not be the name specified
*/
drive->label = vhdr->label; /* put in the label information */
} else if (vhdr->magic == VINUM_NOMAGIC) /* was ours, but we gave it away */
result = DL_DELETED_LABEL; /* and return the info */
else
result = DL_NOT_OURS; /* we could have it, but we don't yet */
Free(vhdr); /* that's all. */
return result;
}
/*
* Check a drive for a vinum header. If found,
* read configuration information from the drive and
* incorporate the data into the configuration.
*
* Return drive number.
*/
struct drive *
check_drive(char *devicename)
{
int driveno;
int i;
struct drive *drive;
driveno = find_drive_by_name(devicename, 1); /* if entry doesn't exist, create it */
drive = &vinum_conf.drive[driveno]; /* and get a pointer */
if (drive->state >= drive_down) /* up or down, we know it */
return drive;
if (read_drive_label(drive, 0) == DL_OURS) { /* one of ours */
for (i = 0; i < vinum_conf.drives_allocated; i++) { /* see if the name already exists */
if ((i != driveno) /* not this drive */
&&(DRIVE[i].state != drive_unallocated) /* and it's allocated */
&&(strcmp(DRIVE[i].label.name,
DRIVE[driveno].label.name) == 0)) { /* and it has the same name */
struct drive *mydrive = &DRIVE[i];
if (mydrive->devicename[0] == '/') { /* we know a device name for it */
/*
* set an error, but don't take the
* drive down: that would cause unneeded
* error messages.
*/
drive->lasterror = EEXIST;
break;
} else { /* it's just a place holder, */
int sdno;
for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* look at each subdisk */
if ((SD[sdno].driveno == i) /* it's pointing to this one, */
&&(SD[sdno].state != sd_unallocated)) { /* and it's a real subdisk */
SD[sdno].driveno = drive->driveno; /* point to the one we found */
update_sd_state(sdno); /* and update its state */
}
}
bzero(mydrive, sizeof(struct drive)); /* don't deallocate it, just remove it */
}
}
}
return drive;
} else { /* not ours, */
close_drive(drive);
free_drive(drive); /* get rid of it */
return NULL;
}
}
static char *
sappend(char *txt, char *s)
{
while ((*s++ = *txt++) != 0);
return s - 1;
}
void
format_config(char *config, int len)
{
int i;
int j;
char *s = config;
char *configend = &config[len];
bzero(config, len);
/* First write the volume configuration */
for (i = 0; i < vinum_conf.volumes_allocated; i++) {
struct volume *vol;
vol = &vinum_conf.volume[i];
if ((vol->state > volume_uninit)
&& (vol->name[0] != '\0')) { /* paranoia */
snprintf(s,
configend - s,
"volume %s state %s",
vol->name,
volume_state(vol->state));
while (*s)
s++; /* find the end */
s = sappend("\n", s);
}
}
/* Then the plex configuration */
for (i = 0; i < vinum_conf.plexes_allocated; i++) {
struct plex *plex;
struct volume *vol;
plex = &vinum_conf.plex[i];
if ((plex->state > plex_referenced)
&& (plex->name[0] != '\0')) { /* paranoia */
snprintf(s,
configend - s,
"plex name %s state %s org %s ",
plex->name,
plex_state(plex->state),
plex_org(plex->organization));
while (*s)
s++; /* find the end */
if (isstriped(plex)) {
snprintf(s,
configend - s,
"%ds ",
(int) plex->stripesize);
while (*s)
s++; /* find the end */
}
if (plex->volno >= 0) { /* we have a volume */
vol = &VOL[plex->volno];
snprintf(s,
configend - s,
"vol %s ",
vol->name);
while (*s)
s++; /* find the end */
if ((vol->preferred_plex >= 0) /* has a preferred plex */
&&vol->plex[vol->preferred_plex] == i) /* and it's us */
snprintf(s, configend - s, "preferred ");
while (*s)
s++; /* find the end */
}
for (j = 0; j < plex->subdisks; j++) {
snprintf(s,
configend - s,
" sd %s",
vinum_conf.sd[plex->sdnos[j]].name);
}
s = sappend("\n", s);
}
}
/* And finally the subdisk configuration */
for (i = 0; i < vinum_conf.subdisks_allocated; i++) {
struct sd *sd;
char *drivename;
sd = &SD[i];
if ((sd->state != sd_referenced)
&& (sd->state != sd_unallocated)
&& (sd->name[0] != '\0')) { /* paranoia */
drivename = vinum_conf.drive[sd->driveno].label.name;
/*
* XXX We've seen cases of dead subdisks
* which don't have a drive. If we let them
* through here, the drive name is null, so
* they get the drive named 'plex'.
*
* This is a breakage limiter, not a fix.
*/
if (drivename[0] == '\0')
drivename = "*invalid*";
snprintf(s,
configend - s,
"sd name %s drive %s len %llus driveoffset %llus state %s",
sd->name,
drivename,
(unsigned long long) sd->sectors,
(unsigned long long) sd->driveoffset,
sd_state(sd->state));
while (*s)
s++; /* find the end */
if (sd->plexno >= 0)
snprintf(s,
configend - s,
" plex %s plexoffset %llds",
vinum_conf.plex[sd->plexno].name,
(long long) sd->plexoffset);
else
snprintf(s, configend - s, " detached");
while (*s)
s++; /* find the end */
if (sd->flags & VF_RETRYERRORS) {
snprintf(s, configend - s, " retryerrors");
while (*s)
s++; /* find the end */
}
snprintf(s, configend - s, " \n");
while (*s)
s++; /* find the end */
}
}
if (s > &config[len - 2])
panic("vinum: configuration data overflow");
}
/*
* issue a save config request to the dæmon. The actual work
* is done in process context by daemon_save_config.
*/
void
save_config(void)
{
queue_daemon_request(daemonrq_saveconfig, (union daemoninfo) 0);
}
/*
* Write the configuration to all vinum slices. This
* is performed by the daemon only.
*/
void
daemon_save_config(void)
{
int error;
int written_config; /* set when we first write the config to disk */
int driveno;
struct drive *drive; /* point to current drive info */
struct vinum_hdr *vhdr; /* and as header */
char *config; /* point to config data */
/* don't save the configuration while we're still working on it */
if (vinum_conf.flags & VF_CONFIGURING)
return;
written_config = 0; /* no config written yet */
/* Build a volume header */
vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN); /* get space for the config data */
CHECKALLOC(vhdr, "Can't allocate config data");
vhdr->magic = VINUM_MAGIC; /* magic number */
vhdr->config_length = MAXCONFIG; /* length of following config info */
config = Malloc(MAXCONFIG); /* get space for the config data */
CHECKALLOC(config, "Can't allocate config data");
format_config(config, MAXCONFIG);
error = 0; /* no errors yet */
for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
drive = &vinum_conf.drive[driveno]; /* point to drive */
if (drive->state > drive_referenced) {
LOCKDRIVE(drive); /* don't let it change */
/*
* First, do some drive consistency checks. Some
* of these are kludges, others require a process
* context and couldn't be done before.
*/
if ((drive->devicename[0] == '\0')
|| (drive->label.name[0] == '\0')) {
unlockdrive(drive);
free_drive(drive); /* get rid of it */
break;
}
if (((drive->flags & VF_OPEN) == 0) /* drive not open */
&&(drive->state > drive_down)) { /* and it thinks it's not down */
unlockdrive(drive);
set_drive_state(driveno, drive_down, setstate_force); /* tell it what's what */
continue;
}
if ((drive->state == drive_down) /* it's down */
&&(drive->flags & VF_OPEN)) { /* but open, */
unlockdrive(drive);
close_drive(drive); /* close it */
} else if (drive->state > drive_down) {
microtime(&drive->label.last_update); /* time of last update is now */
bcopy((char *) &drive->label, /* and the label info from the drive structure */
(char *) &vhdr->label,
sizeof(vhdr->label));
if ((drive->state != drive_unallocated)
&& (drive->state != drive_referenced)) { /* and it's a real drive */
error = write_drive(drive,
(char *) vhdr,
VINUMHEADERLEN,
VINUM_LABEL_OFFSET);
if (error == 0) /* first config copy */
error = write_drive(drive,
config,
MAXCONFIG,
VINUM_CONFIG_OFFSET);
if (error == 0)
error = write_drive(drive, /* second copy */
config,
MAXCONFIG,
VINUM_CONFIG_OFFSET + MAXCONFIG);
unlockdrive(drive);
if (error) {
log(LOG_ERR,
"vinum: Can't write config to %s, error %d\n",
drive->devicename,
error);
set_drive_state(drive->driveno, drive_down, setstate_force);
} else
written_config = 1; /* we've written it on at least one drive */
}
} else /* not worth looking at, */
unlockdrive(drive); /* just unlock it again */
}
}
Free(vhdr);
Free(config);
}
/*
* Search disks on system for vinum slices and add
* them to the configuuration if they're not
* there already. devicename is a blank-separate
* list of device names. If not provided, use
* sysctl to get a list of all disks on the
* system.
*
* Return an error indication.
*/
int
vinum_scandisk(char *devicename)
{
struct drive *volatile drive;
volatile int driveno;
int firstdrive; /* first drive in this list */
volatile int gooddrives; /* number of usable drives found */
int firsttime; /* set if we have never configured before */
int error;
char *config_text; /* read the config info from disk into here */
char *volatile cptr; /* pointer into config information */
char *eptr; /* end pointer into config information */
char *config_line; /* copy the config line to */
volatile int status;
int *drivelist; /* list of drive indices */
char *partname; /* for creating partition names */
char *cp; /* pointer to start of disk name */
char *ep; /* and to first char after name */
char *np; /* name pointer in naem we build */
size_t alloclen;
int malloced;
int partnamelen; /* length of partition name */
int drives;
int goodpart; /* good vinum drives on this disk */
malloced = 0; /* devicename not malloced */
if (devicename == NULL) { /* no devices specified, */
/* get a list of all disks in the system */
/* Get size of disk list */
error = kernel_sysctlbyname(&thread0, "kern.disks", NULL,
NULL, NULL, 0, &alloclen);
if (error) {
log(LOG_ERR, "vinum: can't get disk list: %d\n", error);
return EINVAL;
}
devicename = Malloc(alloclen);
if (devicename == NULL) {
printf("vinum: can't allocate memory for drive list");
return ENOMEM;
} else
malloced = 1;
/* Now get the list of disks */
kernel_sysctlbyname(&thread0, "kern.disks", devicename,
&alloclen, NULL, 0, NULL);
}
status = 0; /* success indication */
vinum_conf.flags |= VF_READING_CONFIG; /* reading config from disk */
partname = Malloc(MAXPATHLEN); /* extract name of disk here */
if (partname == NULL) {
printf("vinum_scandisk: can't allocate memory for drive name");
return ENOMEM;
}
gooddrives = 0; /* number of usable drives found */
firstdrive = vinum_conf.drives_used; /* the first drive */
firsttime = vinum_conf.drives_used == 0; /* are we a virgin? */
/* allocate a drive pointer list */
drives = 256; /* should be enough for most cases */
drivelist = (int *) Malloc(drives * sizeof(int));
CHECKALLOC(drivelist, "Can't allocate memory");
error = lock_config(); /* make sure we're alone here */
if (error)
return error;
error = setjmp(command_fail); /* come back here on error */
if (error) /* longjmped out */
return error;
/* Open all drives and find which was modified most recently */
for (cp = devicename; *cp; cp = ep) {
char part; /* UNIX partition */
#ifdef __i386__
int slice;
#endif
while (*cp == ' ')
cp++; /* find start of name */
if (*cp == '\0') /* done, */
break;
ep = cp;
while (*ep && (*ep != ' ')) /* find end of name */
ep++;
np = partname; /* start building up a name here */
if (*cp != '/') { /* name doesn't start with /, */
strcpy(np, "/dev/"); /* assume /dev */
np += strlen("/dev/");
}
memcpy(np, cp, ep - cp); /* put in name */
np += ep - cp; /* and point past */
goodpart = 0; /* no partitions on this disk yet */
partnamelen = MAXPATHLEN + np - partname; /* remaining length in partition name */
#ifdef __i386__
/* first try the partition table */
for (slice = 1; slice < 5; slice++)
for (part = 'a'; part < 'i'; part++) {
if (part != 'c') { /* don't do the c partition */
snprintf(np,
partnamelen,
"s%d%c",
slice,
part);
drive = check_drive(partname); /* try to open it */
if (drive) { /* got something, */
if (drive->flags & VF_CONFIGURED) /* already read this config, */
log(LOG_WARNING,
"vinum: already read config from %s\n", /* say so */
drive->label.name);
else {
if (gooddrives == drives) /* ran out of entries */
EXPAND(drivelist, int, drives, drives); /* double the size */
drivelist[gooddrives] = drive->driveno; /* keep the drive index */
drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */
gooddrives++;
goodpart++;
}
}
}
}
#endif
/*
* If the machine doesn't have a BIOS
* partition table, try normal devices.
*/
if (goodpart == 0) { /* didn't find anything, */
for (part = 'a'; part < 'i'; part++) /* try the compatibility partition */
if (part != 'c') { /* don't do the c partition */
snprintf(np,
partnamelen,
"%c",
part);
drive = check_drive(partname); /* try to open it */
if (drive) { /* got something, */
if (drive->flags & VF_CONFIGURED) /* already read this config, */
log(LOG_WARNING,
"vinum: already read config from %s\n", /* say so */
drive->label.name);
else {
if (gooddrives == drives) /* ran out of entries */
EXPAND(drivelist, int, drives, drives); /* double the size */
drivelist[gooddrives] = drive->driveno; /* keep the drive index */
drive->flags &= ~VF_NEWBORN; /* which is no longer newly born */
gooddrives++;
goodpart++;
}
}
}
}
}
Free(partname);
if (gooddrives == 0) {
if (firsttime)
log(LOG_WARNING, "vinum: no drives found\n");
else
log(LOG_INFO, "vinum: no additional drives found\n");
if (malloced)
Free(devicename);
unlock_config();
return ENOENT;
}
/*
* We now have at least one drive open. Sort
* them in order of config time and merge the
* config info with what we have already.
*/
qsort(drivelist, gooddrives, sizeof(int), drivecmp);
config_text = (char *) Malloc(MAXCONFIG * 2); /* allocate buffers */
CHECKALLOC(config_text, "Can't allocate memory");
config_line = (char *) Malloc(MAXCONFIGLINE * 2); /* allocate buffers */
CHECKALLOC(config_line, "Can't allocate memory");
for (driveno = 0; driveno < gooddrives; driveno++) { /* now include the config */
drive = &DRIVE[drivelist[driveno]]; /* point to the drive */
if (firsttime && (driveno == 0)) /* we've never configured before, */
log(LOG_INFO, "vinum: reading configuration from %s\n", drive->devicename);
else
log(LOG_INFO, "vinum: updating configuration from %s\n", drive->devicename);
if (drive->state == drive_up)
/* Read in both copies of the configuration information */
error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET);
else {
error = EIO;
printf("vinum_scandisk: %s is %s\n", drive->devicename, drive_state(drive->state));
}
if (error != 0) {
log(LOG_ERR, "vinum: Can't read device %s, error %d\n", drive->devicename, error);
free_drive(drive); /* give it back */
status = error;
}
/*
* At this point, check that the two copies
* are the same, and do something useful if
* not. In particular, consider which is
* newer, and what this means for the
* integrity of the data on the drive.
*/
else {
vinum_conf.drives_used++; /* another drive in use */
/* Parse the configuration, and add it to the global configuration */
for (cptr = config_text; *cptr != '\0';) { /* love this style(9) */
volatile int parse_status; /* return value from parse_config */
for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */
*eptr++ = *cptr++;
*eptr = '\0'; /* and delimit */
if (setjmp(command_fail) == 0) { /* come back here on error and continue */
parse_status = parse_config(config_line, &keyword_set, 1); /* parse the config line */
/*
* parse_config recognizes referenced
* drives and builds a drive entry for
* them. This may expand the drive
* table, thus invalidating the pointer.
*/
drive = &DRIVE[drivelist[driveno]]; /* point to the drive */
if (parse_status < 0) { /* error in config */
/*
* This config should have been parsed
* in user space. If we run into
* problems here, something serious is
* afoot. Complain and let the user
* snarf the config to see what's
* wrong.
*/
log(LOG_ERR,
"vinum: Config error on %s, aborting integration\n",
drive->devicename);
free_drive(drive); /* give it back */
status = EINVAL;
}
}
while (*cptr == '\n')
cptr++; /* skip to next line */
}
}
drive->flags |= VF_CONFIGURED; /* this drive's configuration is complete */
}
Free(config_line);
Free(config_text);
Free(drivelist);
vinum_conf.flags &= ~VF_READING_CONFIG; /* no longer reading from disk */
if (status != 0)
printf("vinum: couldn't read configuration");
else
updateconfig(VF_READING_CONFIG); /* update from disk config */
if (malloced)
Free(devicename);
unlock_config();
return status;
}
/*
* Compare the modification dates of the drives, for qsort.
* Return 1 if a < b, 0 if a == b, 01 if a > b: in other
* words, sort backwards.
*/
int
drivecmp(const void *va, const void *vb)
{
const struct drive *a = &DRIVE[*(const int *) va];
const struct drive *b = &DRIVE[*(const int *) vb];
if ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
&& (a->label.last_update.tv_usec == b->label.last_update.tv_usec))
return 0;
else if ((a->label.last_update.tv_sec > b->label.last_update.tv_sec)
|| ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
&& (a->label.last_update.tv_usec > b->label.last_update.tv_usec)))
return -1;
else
return 1;
}
/* Local Variables: */
/* fill-column: 50 */
/* End: */

View File

@ -1,154 +0,0 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumio.h,v 1.23 2003/05/04 05:25:46 grog Exp grog $
* $FreeBSD$
*/
#define L 'F' /* ID letter of our ioctls */
#define MAX_IOCTL_REPLY 1024
#ifdef VINUMDEBUG
struct debuginfo {
int changeit;
int param;
};
#endif
enum objecttype {
drive_object,
sd_object,
plex_object,
volume_object,
invalid_object
};
/*
* The state to set with VINUM_SETSTATE. Since each object has a
* different set of states, we need to translate later.
*/
enum objectstate {
object_down,
object_initializing,
object_initialized,
object_up
};
/*
* This structure is used for modifying objects
* (VINUM_SETSTATE, VINUM_REMOVE, VINUM_RESETSTATS, VINUM_ATTACH,
* VINUM_DETACH, VINUM_REPLACE
*/
struct vinum_ioctl_msg {
int index;
enum objecttype type;
enum objectstate state; /* state to set (VINUM_SETSTATE) */
enum parityop op; /* for parity ops */
int force; /* do it even if it doesn't make sense */
int recurse; /* recurse (VINUM_REMOVE) */
int verify; /* verify (initsd, rebuildparity) */
int otherobject; /* superordinate object (attach),
* replacement object (replace) */
int rename; /* rename object (attach) */
int64_t offset; /* offset of subdisk (for attach) */
int blocksize; /* size of block to revive (bytes) */
};
/* VINUM_CREATE returns a buffer of this kind */
struct _ioctl_reply {
int error;
char msg[MAX_IOCTL_REPLY];
};
struct vinum_rename_msg {
int index;
int recurse; /* rename subordinate objects too */
enum objecttype type;
char newname[MAXNAME]; /* new name to give to object */
};
/* ioctl requests */
#define BUFSIZE 1024 /* size of buffer, including continuations */
#define VINUM_CREATE _IOC(IOC_IN | IOC_OUT, L, 64, BUFSIZE) /* configure vinum */
#define VINUM_GETCONFIG _IOR(L, 65, struct __vinum_conf) /* get global config */
#define VINUM_DRIVECONFIG _IOWR(L, 66, struct _drive) /* get drive config */
#define VINUM_SDCONFIG _IOWR(L, 67, struct _sd) /* get subdisk config */
#define VINUM_PLEXCONFIG _IOWR(L, 68, struct _plex) /* get plex config */
#define VINUM_VOLCONFIG _IOWR(L, 69, struct _volume) /* get volume config */
#define VINUM_PLEXSDCONFIG _IOWR(L, 70, struct _sd) /* get sd config for plex (plex, sdno) */
#define VINUM_GETFREELIST _IOWR(L, 71, struct drive_freelist) /* get freelist element (drive, fe) */
#define VINUM_SAVECONFIG _IOW(L, 72, int) /* write config to disk */
#define VINUM_RESETCONFIG _IOC(0, L, 73, 0) /* trash config on disk */
#define VINUM_INIT _IOC(0, L, 74, 0) /* read config from disk */
#define VINUM_READCONFIG _IOC(IOC_IN | IOC_OUT, L, 75, BUFSIZE) /* read config from disk */
#ifdef VINUMDEBUG
#define VINUM_DEBUG _IOWR(L, 127, struct debuginfo) /* call the debugger from ioctl () */
#endif
/*
* Start an object. Pass two integers:
* msg [0] index in vinum_conf.<object>
* msg [1] type of object (see below)
*
* Return ioctl_reply
*/
#define VINUM_SETSTATE _IOC(IOC_IN | IOC_OUT, L, 76, MAX_IOCTL_REPLY) /* start an object */
#define VINUM_RELEASECONFIG _IOC(0, L, 77, 0) /* release locks and write config to disk */
#define VINUM_STARTCONFIG _IOW(L, 78, int) /* start a configuration operation */
#define VINUM_MEMINFO _IOR(L, 79, struct meminfo) /* get memory usage summary */
#define VINUM_MALLOCINFO _IOWR(L, 80, struct mc) /* get specific malloc information [i] */
#define VINUM_INITSD _IOW(L, 82, int) /* initialize a subdisk */
#define VINUM_REMOVE _IOWR(L, 83, struct _ioctl_reply) /* remove an object */
#define VINUM_READPOL _IOWR(L, 84, struct _ioctl_reply) /* set read policy */
#define VINUM_SETSTATE_FORCE _IOC(IOC_IN | IOC_OUT, L, 85, MAX_IOCTL_REPLY) /* diddle object state */
#define VINUM_RESETSTATS _IOWR(L, 86, struct _ioctl_reply) /* reset object stats */
#define VINUM_ATTACH _IOWR(L, 87, struct _ioctl_reply) /* attach an object */
#define VINUM_DETACH _IOWR(L, 88, struct _ioctl_reply) /* remove an object */
#define VINUM_RENAME _IOWR(L, 89, struct _ioctl_reply) /* rename an object */
#define VINUM_REPLACE _IOWR(L, 90, struct _ioctl_reply) /* replace an object */
#ifdef VINUMDEBUG
#define VINUM_RQINFO _IOWR(L, 91, struct rqinfo) /* get request info [i] from trace buffer */
#endif
#define VINUM_DAEMON _IOC(0, L, 92, 0) /* perform the kernel part of Vinum daemon */
#define VINUM_FINDDAEMON _IOC(0, L, 93, 0) /* check for presence of Vinum daemon */
#define VINUM_SETDAEMON _IOW(L, 94, int) /* set daemon flags */
#define VINUM_GETDAEMON _IOR(L, 95, int) /* get daemon flags */
#define VINUM_PARITYOP _IOWR(L, 96, struct _ioctl_reply) /* check/rebuild RAID-4/5 parity */
#define VINUM_MOVE _IOWR(L, 98, struct _ioctl_reply) /* move an object */

View File

@ -1,960 +0,0 @@
/*
* XXX replace all the checks on object validity with
* calls to valid<object>
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/*-
* Copyright (c) 1997, 1998, 1999
* Nan Yang Computer Services Limited. All rights reserved.
*
* Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
*
* Written by Greg Lehey
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumioctl.c,v 1.23 2003/05/23 01:02:22 grog Exp grog $
*/
#include <dev/vinum/vinumhdr.h>
#include <dev/vinum/request.h>
#ifdef VINUMDEBUG
#include <sys/reboot.h>
#endif
void attachobject(struct vinum_ioctl_msg *);
void detachobject(struct vinum_ioctl_msg *);
void renameobject(struct vinum_rename_msg *);
void replaceobject(struct vinum_ioctl_msg *);
void moveobject(struct vinum_ioctl_msg *);
void setreadpol(struct vinum_ioctl_msg *);
jmp_buf command_fail; /* return on a failed command */
/* ioctl routine */
int
vinumioctl(struct cdev *dev,
u_long cmd,
caddr_t data,
int flag,
struct thread *td)
{
unsigned int objno;
struct sd *sd;
struct plex *plex;
struct volume *vol;
/* First, decide what we're looking at */
if ((minor(dev) == VINUM_SUPERDEV_MINOR)
|| (minor(dev) == VINUM_DAEMON_MINOR))
return vinum_super_ioctl(dev, cmd, data);
else /* real device */
switch (DEVTYPE(dev)) {
case VINUM_SD_TYPE:
case VINUM_SD2_TYPE: /* second half of sd namespace */
objno = Sdno(dev);
sd = &SD[objno];
switch (cmd) {
case DIOCGSECTORSIZE:
*(u_int *) data = sd->sectorsize;
return 0;
case DIOCGMEDIASIZE:
*(u_int64_t *) data = sd->sectors * sd->sectorsize;
return 0;
/*
* We don't have this stuff on hardware,
* so just pretend to do it so that
* utilities don't get upset.
*/
case DIOCWDINFO: /* write partition info */
case DIOCSDINFO: /* set partition info */
return 0; /* not a titty */
default:
return ENOTTY; /* not my kind of ioctl */
}
return 0; /* pretend we did it */
case VINUM_PLEX_TYPE:
objno = Plexno(dev);
plex = &PLEX[objno];
switch (cmd) {
case DIOCGSECTORSIZE:
*(u_int64_t *) data = plex->sectorsize;
return 0;
case DIOCGMEDIASIZE:
*(u_int64_t *) data = plex->length * plex->sectorsize;
return 0;
/*
* We don't have this stuff on hardware,
* so just pretend to do it so that
* utilities don't get upset.
*/
case DIOCWDINFO: /* write partition info */
case DIOCSDINFO: /* set partition info */
return 0; /* not a titty */
default:
return ENOTTY; /* not my kind of ioctl */
}
return 0; /* pretend we did it */
case VINUM_VOLUME_TYPE:
objno = Volno(dev);
if ((unsigned) objno >= (unsigned) vinum_conf.volumes_allocated) /* not a valid volume */
return ENXIO;
vol = &VOL[objno];
if (vol->state != volume_up) /* not up, */
return EIO; /* I/O error */
switch (cmd) {
case DIOCGSECTORSIZE:
*(u_int *) data = vol->sectorsize;
return 0;
case DIOCGMEDIASIZE:
*(u_int64_t *) data = vol->size * vol->sectorsize;
return 0;
/*
* We don't have this stuff on hardware,
* so just pretend to do it so that
* utilities don't get upset.
*/
case DIOCWDINFO: /* write partition info */
case DIOCSDINFO: /* set partition info */
return 0; /* not a titty */
default:
return ENOTTY; /* not my kind of ioctl */
}
break;
}
return 0; /* XXX */
}
/* Handle ioctls for the super device */
int
vinum_super_ioctl(struct cdev *dev,
u_long cmd,
caddr_t data)
{
int error = 0;
unsigned int index; /* for transferring config info */
unsigned int sdno; /* for transferring config info */
int fe; /* free list element number */
struct _ioctl_reply *ioctl_reply = (struct _ioctl_reply *) data; /* struct to return */
ioctl_reply = (struct _ioctl_reply *) data; /* save the address to reply to */
if (error) /* bombed out */
return 0; /* the reply will contain meaningful info */
switch (cmd) {
#ifdef VINUMDEBUG
case VINUM_DEBUG:
if (((struct debuginfo *) data)->changeit) /* change debug settings */
debug = (((struct debuginfo *) data)->param);
else {
if (debug & DEBUG_REMOTEGDB)
boothowto |= RB_GDB; /* serial debug line */
else
boothowto &= ~RB_GDB; /* local ddb */
kdb_enter("vinum debug");
}
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
ioctl_reply->error = 0;
return 0;
#endif
case VINUM_CREATE: /* create a vinum object */
error = lock_config(); /* get the config for us alone */
if (error) /* can't do it, */
return error; /* give up */
error = setjmp(command_fail); /* come back here on error */
if (error == 0) /* first time, */
ioctl_reply->error = parse_user_config((char *) data, /* update the config */
&keyword_set);
else if (ioctl_reply->error == 0) { /* longjmp, but no error status */
ioctl_reply->error = EINVAL; /* note that something's up */
ioctl_reply->msg[0] = '\0'; /* no message? */
}
unlock_config();
return 0; /* must be 0 to return the real error info */
case VINUM_GETCONFIG: /* get the configuration information */
bcopy(&vinum_conf, data, sizeof(vinum_conf));
return 0;
/* start configuring the subsystem */
case VINUM_STARTCONFIG:
return start_config(*(int *) data); /* just lock it. Parameter is 'force' */
/*
* Move the individual parts of the config to user space.
*
* Specify the index of the object in the first word of data,
* and return the object there
*/
case VINUM_DRIVECONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.drives_allocated) /* can't do it */
return ENXIO; /* bang */
bcopy(&DRIVE[index], data, sizeof(struct _drive)); /* copy the config item out */
return 0;
case VINUM_SDCONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.subdisks_allocated) /* can't do it */
return ENXIO; /* bang */
bcopy(&SD[index], data, sizeof(struct _sd)); /* copy the config item out */
return 0;
case VINUM_PLEXCONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.plexes_allocated) /* can't do it */
return ENXIO; /* bang */
bcopy(&PLEX[index], data, sizeof(struct _plex)); /* copy the config item out */
return 0;
case VINUM_VOLCONFIG:
index = *(int *) data; /* get the index */
if (index >= (unsigned) vinum_conf.volumes_allocated) /* can't do it */
return ENXIO; /* bang */
bcopy(&VOL[index], data, sizeof(struct _volume)); /* copy the config item out */
return 0;
case VINUM_PLEXSDCONFIG:
index = *(int *) data; /* get the plex index */
sdno = ((int *) data)[1]; /* and the sd index */
if ((index >= (unsigned) vinum_conf.plexes_allocated) /* plex doesn't exist */
||(sdno >= PLEX[index].subdisks)) /* or it doesn't have this many subdisks */
return ENXIO; /* bang */
bcopy(&SD[PLEX[index].sdnos[sdno]], /* copy the config item out */
data,
sizeof(struct _sd));
return 0;
/*
* We get called in two places: one from the
* userland config routines, which call us
* to complete the config and save it. This
* call supplies the value 0 as a parameter.
*
* The other place is from the user "saveconfig"
* routine, which can only work if we're *not*
* configuring. In this case, supply parameter 1.
*/
case VINUM_SAVECONFIG:
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
if (*(int *) data == 0) /* finish config */
finish_config(1); /* finish the configuration and update it */
else
return EBUSY; /* can't do it now */
}
save_config(); /* save configuration to disk */
return 0;
case VINUM_RELEASECONFIG: /* release the config */
if (VFLAGS & VF_CONFIGURING) { /* must be us, the others are asleep */
finish_config(0); /* finish the configuration, don't change it */
save_config(); /* save configuration to disk */
} else
error = EINVAL; /* release what config? */
return error;
case VINUM_READCONFIG:
if (((char *) data)[0] == '\0')
ioctl_reply->error = vinum_scandisk(NULL); /* built your own list */
else
ioctl_reply->error = vinum_scandisk((char *) data);
if (ioctl_reply->error == ENOENT) {
if (vinum_conf.drives_used > 0)
strcpy(ioctl_reply->msg, "no additional drives found");
else
strcpy(ioctl_reply->msg, "no drives found");
} else if (ioctl_reply->error)
strcpy(ioctl_reply->msg, "can't read configuration information, see log file");
return 0; /* must be 0 to return the real error info */
case VINUM_INIT:
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
ioctl_reply->error = 0;
return 0;
case VINUM_RESETCONFIG:
if (vinum_inactive(0)) { /* if the volumes are not active */
/*
* Note the open count. We may be called from v, so we'll be open.
* Keep the count so we don't underflow
*/
free_vinum(1); /* clean up everything */
log(LOG_NOTICE, "vinum: CONFIGURATION OBLITERATED\n");
ioctl_reply = (struct _ioctl_reply *) data; /* reinstate the address to reply to */
ioctl_reply->error = 0;
return 0;
}
return EBUSY;
case VINUM_SETSTATE:
setstate((struct vinum_ioctl_msg *) data); /* set an object state */
return 0;
/*
* Set state by force, without changing
* anything else.
*/
case VINUM_SETSTATE_FORCE:
setstate_by_force((struct vinum_ioctl_msg *) data); /* set an object state */
return 0;
#ifdef VINUMDEBUG
case VINUM_MEMINFO:
vinum_meminfo(data);
return 0;
case VINUM_MALLOCINFO:
return vinum_mallocinfo(data);
case VINUM_RQINFO:
return vinum_rqinfo(data);
#endif
case VINUM_REMOVE:
remove((struct vinum_ioctl_msg *) data); /* remove an object */
return 0;
case VINUM_GETFREELIST: /* get a drive free list element */
index = *(int *) data; /* get the drive index */
fe = ((int *) data)[1]; /* and the free list element */
if ((index >= (unsigned) vinum_conf.drives_allocated) /* plex doesn't exist */
||(DRIVE[index].state == drive_unallocated))
return ENODEV;
if (fe >= DRIVE[index].freelist_entries) /* no such entry */
return ENOENT;
bcopy(&DRIVE[index].freelist[fe],
data,
sizeof(struct drive_freelist));
return 0;
case VINUM_RESETSTATS:
resetstats((struct vinum_ioctl_msg *) data); /* reset object stats */
return 0;
/* attach an object to a superordinate object */
case VINUM_ATTACH:
attachobject((struct vinum_ioctl_msg *) data);
return 0;
/* detach an object from a superordinate object */
case VINUM_DETACH:
detachobject((struct vinum_ioctl_msg *) data);
return 0;
/* rename an object */
case VINUM_RENAME:
renameobject((struct vinum_rename_msg *) data);
return 0;
/* replace an object */
case VINUM_REPLACE:
replaceobject((struct vinum_ioctl_msg *) data);
return 0;
case VINUM_DAEMON:
vinum_daemon(); /* perform the daemon */
return 0;
case VINUM_FINDDAEMON: /* check for presence of daemon */
return vinum_finddaemon();
return 0;
case VINUM_SETDAEMON: /* set daemon flags */
return vinum_setdaemonopts(*(int *) data);
case VINUM_GETDAEMON: /* get daemon flags */
*(int *) data = daemon_options;
return 0;
case VINUM_PARITYOP: /* check/rebuild RAID-4/5 parity */
parityops((struct vinum_ioctl_msg *) data);
return 0;
/* move an object */
case VINUM_MOVE:
moveobject((struct vinum_ioctl_msg *) data);
return 0;
case VINUM_READPOL:
setreadpol((struct vinum_ioctl_msg *) data);
return 0;
default:
/* FALLTHROUGH */
break;
}
return 0; /* to keep the compiler happy */
}
/*
* The following four functions check the supplied
* object index and return a pointer to the object
* if it exists. Otherwise they longjump out via
* throw_rude_remark.
*/
struct drive *
validdrive(int driveno, struct _ioctl_reply *reply)
{
if ((driveno < vinum_conf.drives_allocated)
&& (DRIVE[driveno].state > drive_referenced))
return &DRIVE[driveno];
strcpy(reply->msg, "No such drive");
reply->error = ENOENT;
return NULL;
}
struct sd *
validsd(int sdno, struct _ioctl_reply *reply)
{
if ((sdno < vinum_conf.subdisks_allocated)
&& (SD[sdno].state > sd_referenced))
return &SD[sdno];
strcpy(reply->msg, "No such subdisk");
reply->error = ENOENT;
return NULL;
}
struct plex *
validplex(int plexno, struct _ioctl_reply *reply)
{
if ((plexno < vinum_conf.plexes_allocated)
&& (PLEX[plexno].state > plex_referenced))
return &PLEX[plexno];
strcpy(reply->msg, "No such plex");
reply->error = ENOENT;
return NULL;
}
struct volume *
validvol(int volno, struct _ioctl_reply *reply)
{
if ((volno < vinum_conf.volumes_allocated)
&& (VOL[volno].state > volume_uninit))
return &VOL[volno];
strcpy(reply->msg, "No such volume");
reply->error = ENOENT;
return NULL;
}
/* reset an object's stats */
void
resetstats(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
switch (msg->type) {
case drive_object:
if (msg->index < vinum_conf.drives_allocated) {
struct drive *drive = &DRIVE[msg->index];
if (drive->state > drive_referenced) {
drive->reads = 0; /* number of reads on this drive */
drive->writes = 0; /* number of writes on this drive */
drive->bytes_read = 0; /* number of bytes read */
drive->bytes_written = 0; /* number of bytes written */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
case sd_object:
if (msg->index < vinum_conf.subdisks_allocated) {
struct sd *sd = &SD[msg->index];
if (sd->state > sd_referenced) {
sd->reads = 0; /* number of reads on this subdisk */
sd->writes = 0; /* number of writes on this subdisk */
sd->bytes_read = 0; /* number of bytes read */
sd->bytes_written = 0; /* number of bytes written */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
break;
case plex_object:
if (msg->index < vinum_conf.plexes_allocated) {
struct plex *plex = &PLEX[msg->index];
if (plex->state > plex_referenced) {
plex->reads = 0;
plex->writes = 0; /* number of writes on this plex */
plex->bytes_read = 0; /* number of bytes read */
plex->bytes_written = 0; /* number of bytes written */
plex->recovered_reads = 0; /* number of recovered read operations */
plex->degraded_writes = 0; /* number of degraded writes */
plex->parityless_writes = 0; /* number of parityless writes */
plex->multiblock = 0; /* requests that needed more than one block */
plex->multistripe = 0; /* requests that needed more than one stripe */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
break;
case volume_object:
if (msg->index < vinum_conf.volumes_allocated) {
struct volume *vol = &VOL[msg->index];
if (vol->state > volume_uninit) {
vol->bytes_read = 0; /* number of bytes read */
vol->bytes_written = 0; /* number of bytes written */
vol->reads = 0; /* number of reads on this volume */
vol->writes = 0; /* number of writes on this volume */
vol->recovered_reads = 0; /* reads recovered from another plex */
reply->error = 0;
return;
}
reply->error = EINVAL;
return;
}
case invalid_object: /* can't get this */
reply->error = EINVAL;
return;
}
}
/* attach an object to a superior object */
void
attachobject(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
int sdno;
struct sd *sd;
struct plex *plex;
struct volume *vol;
switch (msg->type) {
case drive_object: /* you can't attach a drive to anything */
case volume_object: /* nor a volume */
case invalid_object: /* "this can't happen" */
reply->error = EINVAL;
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
return;
case sd_object:
sd = validsd(msg->index, reply);
if (sd == NULL) /* not a valid subdisk */
return;
plex = validplex(msg->otherobject, reply);
if (plex) {
/*
* We should be more intelligent about this.
* We should be able to reattach a dead
* subdisk, but if we want to increase the total
* number of subdisks, we have a lot of reshuffling
* to do. XXX
*/
if ((plex->organization != plex_concat) /* can't attach to striped and RAID-4/5 */
&&(!msg->force)) { /* without using force */
reply->error = EINVAL; /* no message, the user should check */
strcpy(reply->msg, "Can't attach to this plex organization");
} else if (sd->plexno >= 0) { /* already belong to a plex */
reply->error = EBUSY; /* no message, the user should check */
sprintf(reply->msg, "%s is already attached to %s",
sd->name,
sd[sd->plexno].name);
reply->msg[0] = '\0';
} else {
sd->plexoffset = msg->offset; /* this is where we want it */
set_sd_state(sd->sdno, sd_stale, setstate_force); /* make sure it's stale */
give_sd_to_plex(plex->plexno, sd->sdno); /* and give it to the plex */
update_sd_config(sd->sdno, 0);
save_config();
if (sd->state == sd_reviving)
reply->error = EAGAIN; /* need to revive it */
else
reply->error = 0;
}
}
break;
case plex_object:
plex = validplex(msg->index, reply); /* get plex */
if (plex == NULL)
return;
vol = validvol(msg->otherobject, reply); /* and volume information */
if (vol) {
if (vol->plexes == MAXPLEX) { /* we have too many already */
reply->error = ENOSPC; /* nowhere to put it */
strcpy(reply->msg, "Too many plexes");
} else if (plex->volno >= 0) { /* the plex has an owner */
reply->error = EBUSY; /* no message, the user should check */
sprintf(reply->msg, "%s is already attached to %s",
plex->name,
VOL[plex->volno].name);
} else {
for (sdno = 0; sdno < plex->subdisks; sdno++) {
sd = &SD[plex->sdnos[sdno]];
if (sd->state > sd_down) /* real subdisk, vaguely accessible */
set_sd_state(plex->sdnos[sdno], sd_stale, setstate_force); /* make it stale */
}
set_plex_state(plex->plexno, plex_up, setstate_none); /* update plex state */
give_plex_to_volume(msg->otherobject, msg->index, 0); /* and give it to the volume */
update_plex_config(plex->plexno, 0);
save_config();
reply->error = 0; /* all went well */
}
}
}
}
/* detach an object from a superior object */
void
detachobject(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
struct sd *sd;
struct plex *plex;
struct volume *vol;
int sdno;
int plexno;
switch (msg->type) {
case drive_object: /* you can't detach a drive from anything */
case volume_object: /* nor a volume */
case invalid_object: /* "this can't happen" */
reply->error = EINVAL;
reply->msg[0] = '\0'; /* vinum(8) doesn't do this */
return;
case sd_object:
sd = validsd(msg->index, reply);
if (sd == NULL)
return;
if (sd->plexno < 0) { /* doesn't belong to a plex */
reply->error = ENOENT;
strcpy(reply->msg, "Subdisk is not attached");
return;
} else { /* valid plex number */
plex = &PLEX[sd->plexno];
if ((!msg->force) /* don't force things */
&&((plex->state == plex_up) /* and the plex is up */
||((plex->state == plex_flaky) && sd->state == sd_up))) { /* or flaky with this sd up */
reply->error = EBUSY; /* we need this sd */
reply->msg[0] = '\0';
return;
}
sd->plexno = -1; /* anonymous sd */
if (plex->subdisks == 1) { /* this was the only subdisk */
Free(plex->sdnos); /* free the subdisk array */
plex->sdnos = NULL; /* and note the fact */
plex->subdisks_allocated = 0; /* no subdisk space */
} else {
for (sdno = 0; sdno < plex->subdisks; sdno++) {
if (plex->sdnos[sdno] == msg->index) /* found our subdisk */
break;
}
if (sdno < (plex->subdisks - 1)) /* not the last one, compact */
bcopy(&plex->sdnos[sdno + 1],
&plex->sdnos[sdno],
(plex->subdisks - 1 - sdno) * sizeof(int));
}
plex->subdisks--;
if (!bcmp(plex->name, sd->name, strlen(plex->name) + 1))
/* this subdisk is named after the plex */
{
bcopy(sd->name,
&sd->name[3],
min(strlen(sd->name) + 1, MAXSDNAME - 3));
bcopy("ex-", sd->name, 3);
sd->name[MAXSDNAME - 1] = '\0';
}
update_plex_config(plex->plexno, 0);
if (isstriped(plex)) /* we've just mutilated our plex, */
set_plex_state(plex->plexno,
plex_down,
setstate_force | setstate_configuring);
if (plex->volno >= 0) /* plex attached to volume, */
update_volume_config(plex->volno);
save_config();
reply->error = 0;
}
return;
case plex_object:
plex = validplex(msg->index, reply); /* get plex */
if (plex == NULL)
return;
if (plex->volno >= 0) {
int volno = plex->volno;
vol = &VOL[volno];
if ((!msg->force) /* don't force things */
&&((vol->state == volume_up) /* and the volume is up */
&&(vol->plexes == 1))) { /* and this is the last plex */
/*
* XXX As elsewhere, check whether we will lose
* mapping by removing this plex
*/
reply->error = EBUSY; /* we need this plex */
reply->msg[0] = '\0';
return;
}
plex->volno = -1; /* anonymous plex */
for (plexno = 0; plexno < vol->plexes; plexno++) {
if (vol->plex[plexno] == msg->index) /* found our plex */
break;
}
if (plexno < (vol->plexes - 1)) /* not the last one, compact */
bcopy(&vol->plex[plexno + 1],
&vol->plex[plexno],
(vol->plexes - 1 - plexno) * sizeof(int));
vol->plexes--;
vol->last_plex_read = 0; /* don't go beyond the end */
if (!bcmp(vol->name, plex->name, strlen(vol->name) + 1))
/* this plex is named after the volume */
{
/* First, check if the subdisks are the same */
if (msg->recurse) {
int sdno;
for (sdno = 0; sdno < plex->subdisks; sdno++) {
struct sd *sd = &SD[plex->sdnos[sdno]];
if (!bcmp(plex->name, sd->name, strlen(plex->name) + 1))
/* subdisk is named after the plex */
{
bcopy(sd->name,
&sd->name[3],
min(strlen(sd->name) + 1, MAXSDNAME - 3));
bcopy("ex-", sd->name, 3);
sd->name[MAXSDNAME - 1] = '\0';
}
}
}
bcopy(plex->name,
&plex->name[3],
min(strlen(plex->name) + 1, MAXPLEXNAME - 3));
bcopy("ex-", plex->name, 3);
plex->name[MAXPLEXNAME - 1] = '\0';
}
update_volume_config(volno);
save_config();
reply->error = 0;
} else {
reply->error = ENOENT;
strcpy(reply->msg, "Plex is not attached");
}
}
}
void
renameobject(struct vinum_rename_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
struct drive *drive;
struct sd *sd;
struct plex *plex;
struct volume *vol;
switch (msg->type) {
case drive_object: /* you can't attach a drive to anything */
if (find_drive(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
drive = validdrive(msg->index, reply);
if (drive) {
bcopy(msg->newname, drive->label.name, MAXDRIVENAME);
save_config();
reply->error = 0;
}
return;
case sd_object: /* you can't attach a subdisk to anything */
if (find_subdisk(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
sd = validsd(msg->index, reply);
if (sd) {
bcopy(msg->newname, sd->name, MAXSDNAME);
update_sd_config(sd->sdno, 0);
save_config();
reply->error = 0;
}
return;
case plex_object: /* you can't attach a plex to anything */
if (find_plex(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
plex = validplex(msg->index, reply);
if (plex) {
bcopy(msg->newname, plex->name, MAXPLEXNAME);
update_plex_config(plex->plexno, 0);
save_config();
reply->error = 0;
}
return;
case volume_object: /* you can't attach a volume to anything */
if (find_volume(msg->newname, 0) >= 0) { /* we have that name already, */
reply->error = EEXIST;
reply->msg[0] = '\0';
return;
}
vol = validvol(msg->index, reply);
if (vol) {
bcopy(msg->newname, vol->name, MAXVOLNAME);
update_volume_config(msg->index);
save_config();
reply->error = 0;
}
return;
case invalid_object:
reply->error = EINVAL;
reply->msg[0] = '\0';
}
}
/*
* Replace one object with another.
* Currently only for drives.
* message->index is the drive number of the old drive
* message->otherobject is the drive number of the new drive
*/
void
replaceobject(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
reply->error = ENODEV; /* until I know how to do this */
strcpy(reply->msg, "replace not implemented yet");
/* save_config (); */
}
void
moveobject(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
struct drive *drive;
struct sd *sd;
/* Check that our objects are valid (i.e. they exist) */
drive = validdrive(msg->index, (struct _ioctl_reply *) msg);
if (drive == NULL)
return;
sd = validsd(msg->otherobject, (struct _ioctl_reply *) msg);
if (sd == NULL)
return;
if (sd->driveno == msg->index) /* sd already belongs to drive */
return;
if (sd->state > sd_stale)
set_sd_state(sd->sdno, sd_stale, setstate_force); /* make the subdisk stale */
else
sd->state = sd_empty;
if (sd->plexno >= 0) /* part of a plex, */
update_plex_state(sd->plexno); /* update its state */
/* Return the space on the old drive */
if ((sd->driveno >= 0) /* we have a drive, */
&&(sd->sectors > 0)) /* and some space on it */
return_drive_space(sd->driveno, /* return the space */
sd->driveoffset,
sd->sectors);
/* Reassign the old subdisk */
sd->driveno = msg->index;
sd->driveoffset = -1; /* let the drive decide where to put us */
give_sd_to_drive(sd->sdno);
reply->error = 0;
}
void
setreadpol(struct vinum_ioctl_msg *msg)
{
struct _ioctl_reply *reply = (struct _ioctl_reply *) msg;
struct volume *vol;
struct plex *plex;
int myplexno = -1;
/* Check that our objects are valid (i.e. they exist) */
vol = validvol(msg->index, reply);
if (vol == NULL)
return;
/* If a plex was specified, check that is is valid */
if (msg->otherobject >= 0) {
plex = validplex(msg->otherobject, reply);
if (vol == NULL)
return;
/* Is it attached to this volume? */
myplexno = my_plex(msg->index, msg->otherobject);
if (myplexno < 0) {
strcpy(reply->msg, "Plex is not attached to volume");
reply->error = ENOENT;
return;
}
}
lock_config();
vol->preferred_plex = myplexno;
save_config();
unlock_config();
reply->error = 0;
}
/* Local Variables: */
/* fill-column: 50 */
/* End: */

View File

@ -1,152 +0,0 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumkw.h,v 1.20 2003/05/07 03:32:09 grog Exp grog $
* $FreeBSD$
*/
/*
* Command keywords that vinum knows. These include both user-level
* and kernel-level stuff
*/
/*
* Our complete vocabulary. The names of the commands are
* the same as the identifier without the kw_ at the beginning
* (i.e. kw_create defines the "create" keyword). Preprocessor
* magic in parser.c does the rest.
*
* To add a new word: put it in the table below and one of the
* lists in vinumparser.c (probably keywords).
*/
enum keyword {
kw_create,
kw_modify,
kw_list,
kw_l = kw_list,
kw_ld, /* list drive */
kw_ls, /* list subdisk */
kw_lp, /* list plex */
kw_lv, /* list volume */
kw_set,
kw_rm,
kw_mv, /* move object */
kw_move, /* synonym for mv */
kw_start,
kw_stop,
kw_makedev, /* make /dev/vinum devices */
kw_setdaemon, /* set daemon flags */
kw_getdaemon, /* set daemon flags */
kw_help,
kw_drive,
kw_partition,
kw_sd,
kw_subdisk = kw_sd,
kw_plex,
kw_volume,
kw_vol = kw_volume,
kw_read,
kw_readpol,
kw_org,
kw_name,
kw_concat,
kw_striped,
kw_raid4,
kw_raid5,
kw_driveoffset,
kw_plexoffset,
kw_len,
kw_length = kw_len,
kw_size = kw_len,
kw_state,
kw_setupstate,
kw_d, /* flag names */
kw_f,
kw_r,
kw_s,
kw_v,
kw_w,
kw_round, /* round robin */
/*
* The first of these is a volume attibute ("prefer plex"), and the
* second is a plex attribute ("preferred" means that the volume
* prefers this plex).
*/
kw_prefer, /* prefer plex */
kw_preferred, /* preferred plex */
kw_device,
kw_init,
kw_resetconfig,
kw_writethrough,
kw_writeback,
kw_replace,
kw_resetstats,
kw_attach,
kw_detach,
kw_rename,
kw_printconfig,
kw_saveconfig,
kw_hotspare,
kw_detached,
kw_debug, /* go into debugger */
kw_stripe,
kw_mirror,
kw_info,
kw_quit,
kw_max,
kw_setstate,
kw_checkparity,
kw_rebuildparity,
kw_dumpconfig,
kw_retryerrors,
kw_invalid_keyword = -1
};
struct _keywords {
char *name;
enum keyword keyword;
};
struct keywordset {
int size;
struct _keywords *k;
};
extern struct _keywords keywords[];
extern struct _keywords flag_keywords[];
extern struct keywordset keyword_set;
extern struct keywordset flag_set;
/* Parser functions */
enum keyword get_keyword(char *, struct keywordset *);
int tokenize(char *, char *[], int);

View File

@ -1,266 +0,0 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
*
* Written by Greg Lehey
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumlock.c,v 1.19 2003/05/23 01:07:18 grog Exp $
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <dev/vinum/vinumhdr.h>
#include <dev/vinum/request.h>
/* Lock a drive, wait if it's in use */
#ifdef VINUMDEBUG
int
lockdrive(struct drive *drive, char *file, int line)
#else
int
lockdrive(struct drive *drive)
#endif
{
int error;
/* XXX get rid of drive->flags |= VF_LOCKING; */
if ((drive->flags & VF_LOCKED) /* it's locked */
&&(drive->pid == curproc->p_pid)) { /* by us! */
#ifdef VINUMDEBUG
log(LOG_WARNING,
"vinum lockdrive: already locking %s from %s:%d, called from %s:%d\n",
drive->label.name,
drive->lockfilename,
drive->lockline,
basename(file),
line);
#else
log(LOG_WARNING,
"vinum lockdrive: already locking %s\n",
drive->label.name);
#endif
return 0;
}
while ((drive->flags & VF_LOCKED) != 0) {
/*
* There are problems sleeping on a unique identifier,
* since the drive structure can move, and the unlock
* function can be called after killing the drive.
* Solve this by waiting on this function; the number
* of conflicts is negligible.
*/
if ((error = tsleep(&lockdrive,
PRIBIO,
"vindrv",
0)) != 0)
return error;
}
drive->flags |= VF_LOCKED;
drive->pid = curproc->p_pid; /* it's a panic error if curproc is null */
#ifdef VINUMDEBUG
bcopy(basename(file), drive->lockfilename, 15);
drive->lockfilename[15] = '\0'; /* truncate if necessary */
drive->lockline = line;
#endif
return 0;
}
/* Unlock a drive and let the next one at it */
void
unlockdrive(struct drive *drive)
{
drive->flags &= ~VF_LOCKED;
/* we don't reset pid: it's of hysterical interest */
wakeup(&lockdrive);
}
/* Lock a stripe of a plex, wait if it's in use */
struct rangelock *
lockrange(daddr_t stripe, struct buf *bp, struct plex *plex)
{
struct rangelock *lock;
struct rangelock *pos; /* position of first free lock */
int foundlocks; /* number of locks found */
/*
* We could get by without counting the number
* of locks we find, but we have a linear search
* through a table which in most cases will be
* empty. It's faster to stop when we've found
* all the locks that are there. This is also
* the reason why we put pos at the beginning
* instead of the end, though it requires an
* extra test.
*/
pos = NULL;
foundlocks = 0;
/*
* we can't use 0 as a valid address, so
* increment all addresses by 1.
*/
stripe++;
mtx_lock(plex->lockmtx);
/* Wait here if the table is full */
while (plex->usedlocks == PLEX_LOCKS) /* all in use */
msleep(&plex->usedlocks, plex->lockmtx, PRIBIO, "vlock", 0);
#ifdef DIAGNOSTIC
if (plex->usedlocks >= PLEX_LOCKS)
panic("lockrange: Too many locks in use");
#endif
lock = plex->lock; /* pointer in lock table */
if (plex->usedlocks > 0) /* something locked, */
/* Search the lock table for our stripe */
for (; lock < &plex->lock[PLEX_LOCKS]
&& foundlocks < plex->usedlocks;
lock++) {
if (lock->stripe) { /* in use */
foundlocks++; /* found another one in use */
if ((lock->stripe == stripe) /* it's our stripe */
&&(lock->bp != bp)) { /* but not our request */
#ifdef VINUMDEBUG
if (debug & DEBUG_LOCKREQS) {
struct rangelockinfo lockinfo;
lockinfo.stripe = stripe;
lockinfo.bp = bp;
lockinfo.plexno = plex->plexno;
logrq(loginfo_lockwait, (union rqinfou) &lockinfo, bp);
}
#endif
plex->lockwaits++; /* waited one more time */
msleep(lock, plex->lockmtx, PRIBIO, "vrlock", 0);
lock = &plex->lock[-1]; /* start again */
foundlocks = 0;
pos = NULL;
}
} else if (pos == NULL) /* still looking for somewhere? */
pos = lock; /* a place to put this one */
}
/*
* This untidy looking code ensures that we'll
* always end up pointing to the first free lock
* entry, thus minimizing the number of
* iterations necessary.
*/
if (pos == NULL) /* didn't find one on the way, */
pos = lock; /* use the one we're pointing to */
/*
* The address range is free, and we're pointing
* to the first unused entry. Make it ours.
*/
pos->stripe = stripe;
pos->bp = bp;
plex->usedlocks++; /* one more lock */
mtx_unlock(plex->lockmtx);
#ifdef VINUMDEBUG
if (debug & DEBUG_LOCKREQS) {
struct rangelockinfo lockinfo;
lockinfo.stripe = stripe;
lockinfo.bp = bp;
lockinfo.plexno = plex->plexno;
logrq(loginfo_lock, (union rqinfou) &lockinfo, bp);
}
#endif
return pos;
}
/* Unlock a volume and let the next one at it */
void
unlockrange(int plexno, struct rangelock *lock)
{
struct plex *plex;
plex = &PLEX[plexno];
#ifdef DIAGNOSTIC
if (lock < &plex->lock[0] || lock >= &plex->lock[PLEX_LOCKS])
panic("vinum: rangelock %p on plex %d invalid, not between %p and %p",
lock,
plexno,
&plex->lock[0],
&plex->lock[PLEX_LOCKS]);
#endif
#ifdef VINUMDEBUG
if (debug & DEBUG_LOCKREQS) {
struct rangelockinfo lockinfo;
lockinfo.stripe = lock->stripe;
lockinfo.bp = lock->bp;
lockinfo.plexno = plex->plexno;
logrq(loginfo_lockwait, (union rqinfou) &lockinfo, lock->bp);
}
#endif
lock->stripe = 0; /* no longer used */
plex->usedlocks--; /* one less lock */
if (plex->usedlocks == PLEX_LOCKS - 1) /* we were full, */
wakeup(&plex->usedlocks); /* get a waiter if one's there */
wakeup((void *) lock);
}
/* Get a lock for the global config. Wait if it's not available. */
int
lock_config(void)
{
int error;
while ((vinum_conf.flags & VF_LOCKED) != 0) {
vinum_conf.flags |= VF_LOCKING;
if ((error = tsleep(&vinum_conf, PRIBIO, "vincfg", 0)) != 0)
return error;
}
vinum_conf.flags |= VF_LOCKED;
return 0;
}
/* Unlock global config and wake up any waiters. */
void
unlock_config(void)
{
vinum_conf.flags &= ~VF_LOCKED;
if ((vinum_conf.flags & VF_LOCKING) != 0) {
vinum_conf.flags &= ~VF_LOCKING;
wakeup(&vinum_conf);
}
}
/* Local Variables: */
/* fill-column: 50 */
/* End: */

View File

@ -1,290 +0,0 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinummemory.c,v 1.31 2003/05/23 01:08:36 grog Exp $
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <dev/vinum/vinumhdr.h>
#ifdef VINUMDEBUG
#include <dev/vinum/request.h>
extern struct rqinfo rqinfo[];
extern struct rqinfo *rqip;
int rqinfo_size = RQINFO_SIZE; /* for debugger */
#undef longjmp /* this was defined as LongJmp */
#define strrchr rindex
#ifdef __i386__ /* check for validity */
void
LongJmp(jmp_buf buf, int retval)
{
/*
* longjmp is not documented, not even jmp_buf.
* This is what's in i386/i386/support.s:
* ENTRY(longjmp)
* movl 4(%esp),%eax
* movl (%eax),%ebx restore ebx
* movl 4(%eax),%esp restore esp
* movl 8(%eax),%ebp restore ebp
* movl 12(%eax),%esi restore esi
* movl 16(%eax),%edi restore edi
* movl 20(%eax),%edx get rta
* movl %edx,(%esp) put in return frame
* xorl %eax,%eax return(1);
* incl %eax
* ret
*
* from which we deduce the structure of jmp_buf:
*/
struct JmpBuf {
int jb_ebx;
int jb_esp;
int jb_ebp;
int jb_esi;
int jb_edi;
int jb_eip;
};
struct JmpBuf *jb = (struct JmpBuf *) buf;
if ((jb->jb_esp < 0xc0000000)
|| (jb->jb_ebp < 0xc0000000)
|| (jb->jb_eip < 0xc0000000))
panic("Invalid longjmp");
longjmp(buf, retval);
}
#else /* not i386 */
#define LongJmp longjmp /* just use the kernel function */
#endif /* i386 */
/* find the base name of a path name */
char *
basename(char *file)
{
char *f = strrchr(file, '/'); /* chop off dirname if present */
if (f == NULL)
return file;
else
return ++f; /* skip the / */
}
#endif /* VINUMDEBUG */
#ifdef VINUMDEBUG
void
expand_table(void **table, int oldsize, int newsize, char *file, int line)
#else
void
expand_table(void **table, int oldsize, int newsize)
#endif
{
if (newsize > oldsize) {
int *temp;
int s;
s = splhigh();
#ifdef VINUMDEBUG
temp = (int *) MMalloc(newsize, file, line); /* allocate a new table */
#else
temp = (int *) Malloc(newsize); /* allocate a new table */
#endif
CHECKALLOC(temp, "vinum: Can't expand table\n");
bzero((char *) temp, newsize); /* clean it all out */
if (*table != NULL) { /* already something there, */
bcopy((char *) *table, (char *) temp, oldsize); /* copy it to the old table */
#ifdef VINUMDEBUG
FFree(*table, file, line);
#else
Free(*table);
#endif
}
*table = temp;
splx(s);
}
}
#ifdef VINUMDEBUG
#define MALLOCENTRIES 16384
int malloccount = 0;
int highwater = 0; /* highest index ever allocated */
struct mc malloced[MALLOCENTRIES];
#define FREECOUNT 64
int freecount = FREECOUNT; /* for debugger */
int lastfree = 0;
struct mc freeinfo[FREECOUNT];
int total_malloced;
static int mallocseq = 0;
caddr_t
MMalloc(int size, char *file, int line)
{
int s;
caddr_t result;
int i;
if (malloccount >= MALLOCENTRIES) { /* too many */
log(LOG_ERR, "vinum: can't allocate table space to trace memory allocation");
return 0; /* can't continue */
}
/* Wait for malloc if we can */
result = malloc(size,
M_DEVBUF,
curthread->td_intr_nesting_level == 0 ? M_WAITOK : M_NOWAIT);
if (result == NULL)
log(LOG_ERR, "vinum: can't allocate %d bytes from %s:%d\n", size, file, line);
else {
s = splhigh();
for (i = 0; i < malloccount; i++) {
if (((result + size) > malloced[i].address)
&& (result < malloced[i].address + malloced[i].size)) /* overlap */
kdb_enter("Malloc overlap");
}
if (result) {
char *f = basename(file);
i = malloccount++;
total_malloced += size;
microtime(&malloced[i].time);
malloced[i].seq = mallocseq++;
malloced[i].size = size;
malloced[i].line = line;
malloced[i].address = result;
strlcpy(malloced[i].file, f, MCFILENAMELEN);
}
if (malloccount > highwater)
highwater = malloccount;
splx(s);
}
return result;
}
void
FFree(void *mem, char *file, int line)
{
int s;
int i;
s = splhigh();
for (i = 0; i < malloccount; i++) {
if ((caddr_t) mem == malloced[i].address) { /* found it */
bzero(mem, malloced[i].size); /* XXX */
free(mem, M_DEVBUF);
malloccount--;
total_malloced -= malloced[i].size;
if (debug & DEBUG_MEMFREE) { /* keep track of recent frees */
char *f = strrchr(file, '/'); /* chop off dirname if present */
if (f == NULL)
f = file;
else
f++; /* skip the / */
microtime(&freeinfo[lastfree].time);
freeinfo[lastfree].seq = malloced[i].seq;
freeinfo[lastfree].size = malloced[i].size;
freeinfo[lastfree].line = line;
freeinfo[lastfree].address = mem;
bcopy(f, freeinfo[lastfree].file, MCFILENAMELEN);
if (++lastfree == FREECOUNT)
lastfree = 0;
}
if (i < malloccount) /* more coming after */
bcopy(&malloced[i + 1], &malloced[i], (malloccount - i) * sizeof(struct mc));
splx(s);
return;
}
}
splx(s);
log(LOG_ERR,
"Freeing unallocated data at 0x%p from %s, line %d\n",
mem,
file,
line);
kdb_enter("Free");
}
void
vinum_meminfo(caddr_t data)
{
struct meminfo *m = (struct meminfo *) data;
m->mallocs = malloccount;
m->total_malloced = total_malloced;
m->malloced = malloced;
m->highwater = highwater;
}
int
vinum_mallocinfo(caddr_t data)
{
struct mc *m = (struct mc *) data;
unsigned int ent = m->seq; /* index of entry to return */
if (ent >= malloccount)
return ENOENT;
m->address = malloced[ent].address;
m->size = malloced[ent].size;
m->line = malloced[ent].line;
m->seq = malloced[ent].seq;
strlcpy(m->file, malloced[ent].file, MCFILENAMELEN);
return 0;
}
/*
* return the nth request trace buffer entry. This
* is indexed back from the current entry (which
* has index 0)
*/
int
vinum_rqinfo(caddr_t data)
{
struct rqinfo *rq = (struct rqinfo *) data;
int ent = *(int *) data; /* 1st word is index */
int lastent = rqip - rqinfo; /* entry number of current entry */
if (ent >= RQINFO_SIZE) /* out of the table */
return ENOENT;
if ((ent = lastent - ent - 1) < 0)
ent += RQINFO_SIZE; /* roll over backwards */
bcopy(&rqinfo[ent], rq, sizeof(struct rqinfo));
return 0;
}
#endif

View File

@ -1,321 +0,0 @@
/*-
* Copyright (c) 1997, 1998, 1999
* Nan Yang Computer Services Limited. All rights reserved.
*
* Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
*
* Written by Greg Lehey
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumobj.h,v 1.7 2003/05/23 01:08:58 grog Exp $
* $FreeBSD$
*/
/*
* Definitions of Vinum objects: drive, subdisk, plex and volume.
* This file is included both by userland programs and by kernel code.
* The userland structures are a subset of the kernel structures, and
* all userland fields are at the beginning, so that a simple copy in
* the length of the userland structure will be sufficient. In order
* to perform this copy, vinumioctl must know both structures, so it
* includes this file again with _KERNEL reset.
*/
#ifndef _KERNEL
/*
* Flags for all objects. Most of them only apply
* to specific objects, but we currently have
* space for all in any 32 bit flags word.
*/
enum objflags {
VF_LOCKED = 1, /* somebody has locked access to this object */
VF_LOCKING = 2, /* we want access to this object */
VF_OPEN = 4, /* object has openers */
VF_WRITETHROUGH = 8, /* volume: write through */
VF_INITED = 0x10, /* unit has been initialized */
VF_WLABEL = 0x20, /* label area is writable */
VF_LABELLING = 0x40, /* unit is currently being labelled */
VF_WANTED = 0x80, /* someone is waiting to obtain a lock */
VF_RAW = 0x100, /* raw volume (no file system) */
VF_LOADED = 0x200, /* module is loaded */
VF_CONFIGURING = 0x400, /* somebody is changing the config */
VF_WILL_CONFIGURE = 0x800, /* somebody wants to change the config */
VF_CONFIG_INCOMPLETE = 0x1000, /* haven't finished changing the config */
VF_CONFIG_SETUPSTATE = 0x2000, /* set a volume up if all plexes are empty */
VF_READING_CONFIG = 0x4000, /* we're reading config database from disk */
VF_FORCECONFIG = 0x8000, /* configure drives even with different names */
VF_NEWBORN = 0x10000, /* for objects: we've just created it */
VF_CONFIGURED = 0x20000, /* for drives: we read the config */
VF_STOPPING = 0x40000, /* for vinum_conf: stop on last close */
VF_DAEMONOPEN = 0x80000, /* the daemon has us open (only superdev) */
VF_CREATED = 0x100000, /* for volumes: freshly created, more then new */
VF_HOTSPARE = 0x200000, /* for drives: use as hot spare */
VF_RETRYERRORS = 0x400000, /* don't down subdisks on I/O errors */
VF_HASDEBUG = 0x800000, /* set if we support debug */
};
#endif
/* Global configuration information for the vinum subsystem */
#ifdef _KERNEL
struct _vinum_conf
#else
struct __vinum_conf
#endif
{
int version; /* version of structures */
#ifdef _KERNEL
/* Pointers to vinum structures */
struct drive *drive;
struct sd *sd;
struct plex *plex;
struct volume *volume;
#else
/* Pointers to vinum structures */
struct _drive *drive;
struct _sd *sd;
struct _plex *plex;
struct _volume *volume;
#endif
/* the number allocated of each object */
int drives_allocated;
int subdisks_allocated;
int plexes_allocated;
int volumes_allocated;
/* and the number currently in use */
/*
* Note that drives_used is not valid during drive recognition
* (vinum_scandisk and friends). Many invalid drives are added and
* later removed; the count isn't correct until we leave
* vinum_scandisk.
*/
int drives_used;
int subdisks_used;
int plexes_used;
int volumes_used;
int flags; /* see above */
#define VINUM_MAXACTIVE 30000 /* maximum number of active requests */
int active; /* current number of requests outstanding */
int maxactive; /* maximum number of requests ever outstanding */
#ifdef _KERNEL
#ifdef VINUMDEBUG
struct request *lastrq;
struct buf *lastbuf;
#endif
#endif
};
/* Use these defines to simplify code */
#define DRIVE vinum_conf.drive
#define SD vinum_conf.sd
#define PLEX vinum_conf.plex
#define VOL vinum_conf.volume
#define VFLAGS vinum_conf.flags
/*
* A drive corresponds to a disk slice. We use a different term to show
* the difference in usage: it doesn't have to be a slice, and could
* theoretically be a complete, unpartitioned disk
*/
#ifdef _KERNEL
struct drive
#else
struct _drive
#endif
{
char devicename[MAXDRIVENAME]; /* name of the slice it's on */
struct vinum_label label; /* and the label information */
enum drivestate state; /* current state */
int flags; /* flags */
int subdisks_allocated; /* number of entries in sd */
int subdisks_used; /* and the number used */
int blocksize; /* size of fs blocks */
int pid; /* of locker */
u_int64_t sectors_available; /* number of sectors still available */
int secsperblock;
int lasterror; /* last error on drive */
int driveno; /* index of drive in vinum_conf */
int opencount; /* number of up subdisks */
u_int64_t reads; /* number of reads on this drive */
u_int64_t writes; /* number of writes on this drive */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
#define DRIVE_MAXACTIVE 30000 /* maximum number of active requests */
int active; /* current number of requests outstanding */
int maxactive; /* maximum number of requests ever outstanding */
int freelist_size; /* number of entries alloced in free list */
int freelist_entries; /* number of entries used in free list */
struct drive_freelist *freelist; /* sorted list of free space on drive */
#ifdef _KERNEL
u_int sectorsize;
off_t mediasize;
struct cdev *dev; /* device information */
#ifdef VINUMDEBUG
char lockfilename[16]; /* name of file from which we were locked */
int lockline; /* and the line number */
#endif
#endif
};
#ifdef _KERNEL
struct sd
#else
struct _sd
#endif
{
char name[MAXSDNAME]; /* name of subdisk */
enum sdstate state; /* state */
int flags;
int lasterror; /* last error occurred */
/* offsets in blocks */
int64_t driveoffset; /* offset on drive */
/*
* plexoffset is the offset from the beginning
* of the plex to the very first part of the
* subdisk, in sectors. For striped, RAID-4 and
* RAID-5 plexes, only the first stripe is
* located at this offset
*/
int64_t plexoffset; /* offset in plex */
u_int64_t sectors; /* and length in sectors */
int sectorsize; /* sector size for DIOCGSECTORSIZE */
int plexno; /* index of plex, if it belongs */
int driveno; /* index of the drive on which it is located */
int sdno; /* our index in vinum_conf */
int plexsdno; /* and our number in our plex */
/* (undefined if no plex) */
u_int64_t reads; /* number of reads on this subdisk */
u_int64_t writes; /* number of writes on this subdisk */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
/* revive parameters */
u_int64_t revived; /* block number of current revive request */
int revive_blocksize; /* revive block size (bytes) */
int revive_interval; /* and time to wait between transfers */
pid_t reviver; /* PID of reviving process */
/* init parameters */
u_int64_t initialized; /* block number of current init request */
int init_blocksize; /* init block size (bytes) */
int init_interval; /* and time to wait between transfers */
#ifdef _KERNEL
struct request *waitlist; /* list of requests waiting on revive op */
struct cdev *dev; /* associated device */
#endif
};
#ifdef _KERNEL
struct plex
#else
struct _plex
#endif
{
enum plexorg organization; /* Plex organization */
enum plexstate state; /* and current state */
u_int64_t length; /* total length of plex (sectors) */
int flags;
int stripesize; /* size of stripe or raid band, in sectors */
int sectorsize; /* sector size for DIOCGSECTORSIZE */
int subdisks; /* number of associated subdisks */
int subdisks_allocated; /* number of subdisks allocated space for */
int *sdnos; /* list of component subdisks */
int plexno; /* index of plex in vinum_conf */
int volno; /* index of volume */
int volplexno; /* number of plex in volume */
/* Statistics */
u_int64_t reads; /* number of reads on this plex */
u_int64_t writes; /* number of writes on this plex */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
u_int64_t recovered_reads; /* number of recovered read operations */
u_int64_t degraded_writes; /* number of degraded writes */
u_int64_t parityless_writes; /* number of parityless writes */
u_int64_t multiblock; /* requests that needed more than one block */
u_int64_t multistripe; /* requests that needed more than one stripe */
int sddowncount; /* number of subdisks down */
/* Lock information */
int usedlocks; /* number currently in use */
int lockwaits; /* and number of waits for locks */
off_t checkblock; /* block number for parity op */
char name[MAXPLEXNAME]; /* name of plex */
#ifdef _KERNEL
struct rangelock *lock; /* ranges of locked addresses */
struct mtx *lockmtx; /* lock mutex, one of plexmutex [] */
daddr_t last_addr; /* last address read from this plex */
struct cdev *dev; /* associated device */
#endif
};
#ifdef _KERNEL
struct volume
#else
struct _volume
#endif
{
char name[MAXVOLNAME]; /* name of volume */
enum volumestate state; /* current state */
int plexes; /* number of plexes */
int preferred_plex; /* index of plex to read from,
* -1 for round-robin */
/*
* index of plex used for last read, for
* round-robin.
*/
int last_plex_read;
int volno; /* volume number */
int flags; /* status and configuration flags */
int openflags; /* flags supplied to last open(2) */
u_int64_t size; /* size of volume */
int blocksize; /* logical block size */
int sectorsize; /* sector size for DIOCGSECTORSIZE */
int active; /* number of outstanding requests active */
int subops; /* and the number of suboperations */
/* Statistics */
u_int64_t bytes_read; /* number of bytes read */
u_int64_t bytes_written; /* number of bytes written */
u_int64_t reads; /* number of reads on this volume */
u_int64_t writes; /* number of writes on this volume */
u_int64_t recovered_reads; /* reads recovered from another plex */
/*
* Unlike subdisks in the plex, space for the
* plex pointers is static.
*/
int plex[MAXPLEX]; /* index of plexes */
#ifdef _KERNEL
struct cdev *dev; /* associated device */
#endif
};

View File

@ -1,236 +0,0 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumparser.c,v 1.25 2003/05/07 03:33:28 grog Exp grog $
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/*
* This file contains the parser for the configuration routines. It's used
* both in the kernel and in the user interface program, thus the separate file.
*/
/*
* Go through a text and split up into text tokens. These are either non-blank
* sequences, or any sequence (except \0) enclosed in ' or ". Embedded ' or
* " characters may be escaped by \, which otherwise has no special meaning.
*
* Delimit by following with a \0, and return pointers to the starts at token [].
* Return the number of tokens found as the return value.
*
* This method has the restriction that a closing " or ' must be followed by
* grey space.
*
* Error conditions are end of line before end of quote, or no space after
* a closing quote. In this case, tokenize() returns -1.
*/
#include <sys/param.h>
#include <dev/vinum/vinumkw.h>
#ifdef _KERNEL
#include <sys/systm.h>
#include <sys/conf.h>
#include <machine/setjmp.h>
/* All this mess for a single struct definition */
#include <sys/uio.h>
#include <sys/namei.h>
#include <sys/mount.h>
#include <dev/vinum/vinumvar.h>
#include <dev/vinum/vinumio.h>
#include <dev/vinum/vinumext.h>
#define iswhite(c) ((c == ' ') || (c == '\t')) /* check for white space */
#else /* userland */
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#define iswhite isspace /* use the ctype macro */
#endif
/* enum keyword is defined in vinumvar.h */
#define keypair(x) { #x, kw_##x } /* create pair "foo", kw_foo */
#define flagkeypair(x) { "-"#x, kw_##x } /* create pair "-foo", kw_foo */
#define KEYWORDSET(x) {sizeof (x) / sizeof (struct _keywords), x}
/* Normal keywords. These are all the words that vinum knows. */
struct _keywords keywords[] =
{keypair(drive),
keypair(partition),
keypair(sd),
keypair(subdisk),
keypair(plex),
keypair(volume),
keypair(vol),
keypair(setupstate),
keypair(readpol),
keypair(org),
keypair(name),
keypair(writethrough),
keypair(writeback),
keypair(device),
keypair(concat),
keypair(raid4),
keypair(raid5),
keypair(striped),
keypair(plexoffset),
keypair(driveoffset),
keypair(length),
keypair(len),
keypair(size),
keypair(state),
keypair(round),
keypair(prefer),
keypair(preferred),
keypair(rename),
keypair(detached),
#ifndef _KERNEL /* for vinum(8) only */
keypair(debug),
keypair(stripe),
keypair(mirror),
#endif
keypair(attach),
keypair(detach),
keypair(printconfig),
keypair(saveconfig),
keypair(replace),
keypair(create),
keypair(read),
keypair(modify),
keypair(list),
keypair(l),
keypair(ld),
keypair(ls),
keypair(lp),
keypair(lv),
keypair(info),
keypair(set),
keypair(rm),
keypair(mv),
keypair(move),
keypair(init),
keypair(resetconfig),
keypair(start),
keypair(stop),
keypair(makedev),
keypair(help),
keypair(quit),
keypair(setdaemon),
keypair(getdaemon),
keypair(max),
keypair(replace),
keypair(readpol),
keypair(resetstats),
keypair(setstate),
keypair(checkparity),
keypair(rebuildparity),
keypair(dumpconfig),
keypair(retryerrors)
};
struct keywordset keyword_set = KEYWORDSET(keywords);
#ifndef _KERNEL
struct _keywords flag_keywords[] =
{flagkeypair(f),
flagkeypair(d),
flagkeypair(v),
flagkeypair(s),
flagkeypair(r),
flagkeypair(w)
};
struct keywordset flag_set = KEYWORDSET(flag_keywords);
#endif
/*
* Take a blank separated list of tokens and turn it into a list of
* individual nul-delimited strings. Build a list of pointers at
* token, which must have enough space for the tokens. Return the
* number of tokens, or -1 on error (typically a missing string
* delimiter).
*/
int
tokenize(char *cptr, char *token[], int maxtoken)
{
char delim; /* delimiter for searching for the partner */
int tokennr; /* index of this token */
for (tokennr = 0; tokennr < maxtoken;) {
while (iswhite(*cptr))
cptr++; /* skip initial white space */
if ((*cptr == '\0') || (*cptr == '\n') || (*cptr == '#')) /* end of line */
return tokennr; /* return number of tokens found */
delim = *cptr;
token[tokennr] = cptr; /* point to it */
tokennr++; /* one more */
if (tokennr == maxtoken) /* run off the end? */
return tokennr;
if ((delim == '\'') || (delim == '"')) { /* delimitered */
for (;;) {
cptr++;
if ((*cptr == delim) && (cptr[-1] != '\\')) { /* found the partner */
cptr++; /* move on past */
if (!iswhite(*cptr)) /* error, no space after closing quote */
return -1;
*cptr++ = '\0'; /* delimit */
} else if ((*cptr == '\0') || (*cptr == '\n')) /* end of line */
return -1;
}
} else { /* not quoted */
while ((*cptr != '\0') && (!iswhite(*cptr)) && (*cptr != '\n'))
cptr++;
if (*cptr != '\0') /* not end of the line, */
*cptr++ = '\0'; /* delimit and move to the next */
}
}
return maxtoken; /* can't get here */
}
/* Find a keyword and return an index */
enum keyword
get_keyword(char *name, struct keywordset *keywordset)
{
int i;
struct _keywords *keywords = keywordset->k; /* point to the keywords */
if (name != NULL) { /* parameter exists */
for (i = 0; i < keywordset->size; i++)
if (!strcmp(name, keywords[i].name))
return (enum keyword) keywords[i].keyword;
}
return kw_invalid_keyword;
}

View File

@ -1,700 +0,0 @@
/*-
* Copyright (c) 1997, 1998
* Cybernet Corporation and Nan Yang Computer Services Limited.
* All rights reserved.
*
* This software was developed as part of the NetMAX project.
*
* Written by Greg Lehey
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Cybernet Corporation
* and Nan Yang Computer Services Limited
* 4. Neither the name of the Companies nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumraid5.c,v 1.23 2003/02/08 03:32:45 grog Exp $
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <dev/vinum/vinumhdr.h>
#include <dev/vinum/request.h>
#include <sys/resourcevar.h>
/*
* Parameters which describe the current transfer.
* These are only used for calculation, but they
* need to be passed to other functions, so it's
* tidier to put them in a struct
*/
struct metrics {
daddr_t stripebase; /* base address of stripe (1st subdisk) */
int stripeoffset; /* offset in stripe */
int stripesectors; /* total sectors to transfer in this stripe */
daddr_t sdbase; /* offset in subdisk of stripe base */
int sdcount; /* number of disks involved in this transfer */
daddr_t diskstart; /* remember where this transfer starts */
int psdno; /* number of parity subdisk */
int badsdno; /* number of down subdisk, if there is one */
int firstsdno; /* first data subdisk number */
/* These correspond to the fields in rqelement, sort of */
int useroffset;
/*
* Initial offset and length values for the first
* data block
*/
int initoffset; /* start address of block to transfer */
short initlen; /* length in sectors of data transfer */
/* Define a normal operation */
int dataoffset; /* start address of block to transfer */
int datalen; /* length in sectors of data transfer */
/* Define a group operation */
int groupoffset; /* subdisk offset of group operation */
int grouplen; /* length in sectors of group operation */
/* Define a normal write operation */
int writeoffset; /* subdisk offset of normal write */
int writelen; /* length in sectors of write operation */
enum xferinfo flags; /* to check what we're doing */
int rqcount; /* number of elements in request */
};
enum requeststatus bre5(struct request *rq,
int plexno,
daddr_t * diskstart,
daddr_t diskend);
void complete_raid5_write(struct rqelement *);
enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex);
void setrqebounds(struct rqelement *rqe, struct metrics *mp);
/*
* define the low-level requests needed to perform
* a high-level I/O operation for a specific plex
* 'plexno'.
*
* Return 0 if all subdisks involved in the
* request are up, 1 if some subdisks are not up,
* and -1 if the request is at least partially
* outside the bounds of the subdisks.
*
* Modify the pointer *diskstart to point to the
* end address. On read, return on the first bad
* subdisk, so that the caller
* (build_read_request) can try alternatives.
*
* On entry to this routine, the prq structures
* are not assigned. The assignment is performed
* by expandrq(). Strictly speaking, the elements
* rqe->sdno of all entries should be set to -1,
* since 0 (from bzero) is a valid subdisk number.
* We avoid this problem by initializing the ones
* we use, and not looking at the others (index >=
* prq->requests).
*/
enum requeststatus
bre5(struct request *rq,
int plexno,
daddr_t * diskaddr,
daddr_t diskend)
{
struct metrics m; /* most of the information */
struct sd *sd;
struct plex *plex;
struct buf *bp; /* user's bp */
struct rqgroup *rqg; /* the request group that we will create */
struct rqelement *rqe; /* point to this request information */
int rsectors; /* sectors remaining in this stripe */
int mysdno; /* another sd index in loops */
int rqno; /* request number */
rqg = NULL; /* shut up, damn compiler */
m.diskstart = *diskaddr; /* start of transfer */
bp = rq->bp; /* buffer pointer */
plex = &PLEX[plexno]; /* point to the plex */
while (*diskaddr < diskend) { /* until we get it all sorted out */
if (*diskaddr >= plex->length) /* beyond the end of the plex */
return REQUEST_EOF; /* can't continue */
m.badsdno = -1; /* no bad subdisk yet */
/* Part A: Define the request */
/*
* First, calculate some sizes:
* The offset of the start address from
* the start of the stripe.
*/
m.stripeoffset = *diskaddr % (plex->stripesize * (plex->subdisks - 1));
/*
* The plex-relative address of the
* start of the stripe.
*/
m.stripebase = *diskaddr - m.stripeoffset;
/* subdisk containing the parity stripe */
if (plex->organization == plex_raid5)
m.psdno = plex->subdisks - 1
- (*diskaddr / (plex->stripesize * (plex->subdisks - 1)))
% plex->subdisks;
else /* RAID-4 */
m.psdno = plex->subdisks - 1;
/*
* The number of the subdisk in which
* the start is located.
*/
m.firstsdno = m.stripeoffset / plex->stripesize;
if (m.firstsdno >= m.psdno) /* at or past parity sd */
m.firstsdno++; /* increment it */
/*
* The offset from the beginning of
* the stripe on this subdisk.
*/
m.initoffset = m.stripeoffset % plex->stripesize;
/* The offset of the stripe start relative to this subdisk */
m.sdbase = m.stripebase / (plex->subdisks - 1);
m.useroffset = *diskaddr - m.diskstart; /* The offset of the start in the user buffer */
/*
* The number of sectors to transfer in the
* current (first) subdisk.
*/
m.initlen = min(diskend - *diskaddr, /* the amount remaining to transfer */
plex->stripesize - m.initoffset); /* and the amount left in this block */
/*
* The number of sectors to transfer in this stripe
* is the minumum of the amount remaining to transfer
* and the amount left in this stripe.
*/
m.stripesectors = min(diskend - *diskaddr,
plex->stripesize * (plex->subdisks - 1) - m.stripeoffset);
/* The number of data subdisks involved in this request */
m.sdcount = (m.stripesectors + m.initoffset + plex->stripesize - 1) / plex->stripesize;
/* Part B: decide what kind of transfer this will be.
* start and end addresses of the transfer in
* the current block.
*
* There are a number of different kinds of
* transfer, each of which relates to a
* specific subdisk:
*
* 1. Normal read. All participating subdisks
* are up, and the transfer can be made
* directly to the user buffer. The bounds
* of the transfer are described by
* m.dataoffset and m.datalen. We have
* already calculated m.initoffset and
* m.initlen, which define the parameters
* for the first data block.
*
* 2. Recovery read. One participating
* subdisk is down. To recover data, all
* the other subdisks, including the parity
* subdisk, must be read. The data is
* recovered by exclusive-oring all the
* other blocks. The bounds of the
* transfer are described by m.groupoffset
* and m.grouplen.
*
* 3. A read request may request reading both
* available data (normal read) and
* non-available data (recovery read).
* This can be a problem if the address
* ranges of the two reads do not coincide:
* in this case, the normal read needs to
* be extended to cover the address range
* of the recovery read, and must thus be
* performed out of malloced memory.
*
* 4. Normal write. All the participating
* subdisks are up. The bounds of the
* transfer are described by m.dataoffset
* and m.datalen. Since these values
* differ for each block, we calculate the
* bounds for the parity block
* independently as the maximum of the
* individual blocks and store these values
* in m.writeoffset and m.writelen. This
* write proceeds in four phases:
*
* i. Read the old contents of each block
* and the parity block.
* ii. ``Remove'' the old contents from
* the parity block with exclusive or.
* iii. ``Insert'' the new contents of the
* block in the parity block, again
* with exclusive or.
*
* iv. Write the new contents of the data
* blocks and the parity block. The data
* block transfers can be made directly from
* the user buffer.
*
* 5. Degraded write where the data block is
* not available. The bounds of the
* transfer are described by m.groupoffset
* and m.grouplen. This requires the
* following steps:
*
* i. Read in all the other data blocks,
* excluding the parity block.
*
* ii. Recreate the parity block from the
* other data blocks and the data to be
* written.
*
* iii. Write the parity block.
*
* 6. Parityless write, a write where the
* parity block is not available. This is
* in fact the simplest: just write the
* data blocks. This can proceed directly
* from the user buffer. The bounds of the
* transfer are described by m.dataoffset
* and m.datalen.
*
* 7. Combination of degraded data block write
* and normal write. In this case the
* address ranges of the reads may also
* need to be extended to cover all
* participating blocks.
*
* All requests in a group transfer transfer
* the same address range relative to their
* subdisk. The individual transfers may
* vary, but since our group of requests is
* all in a single slice, we can define a
* range in which they all fall.
*
* In the following code section, we determine
* which kind of transfer we will perform. If
* there is a group transfer, we also decide
* its bounds relative to the subdisks. At
* the end, we have the following values:
*
* m.flags indicates the kinds of transfers
* we will perform.
* m.initoffset indicates the offset of the
* beginning of any data operation relative
* to the beginning of the stripe base.
* m.initlen specifies the length of any data
* operation.
* m.dataoffset contains the same value as
* m.initoffset.
* m.datalen contains the same value as
* m.initlen. Initially dataoffset and
* datalen describe the parameters for the
* first data block; while building the data
* block requests, they are updated for each
* block.
* m.groupoffset indicates the offset of any
* group operation relative to the beginning
* of the stripe base.
* m.grouplen specifies the length of any
* group operation.
* m.writeoffset indicates the offset of a
* normal write relative to the beginning of
* the stripe base. This value differs from
* m.dataoffset in that it applies to the
* entire operation, and not just the first
* block.
* m.writelen specifies the total span of a
* normal write operation. writeoffset and
* writelen are used to define the parity
* block.
*/
m.groupoffset = 0; /* assume no group... */
m.grouplen = 0; /* until we know we have one */
m.writeoffset = m.initoffset; /* start offset of transfer */
m.writelen = 0; /* nothing to write yet */
m.flags = 0; /* no flags yet */
rsectors = m.stripesectors; /* remaining sectors to examine */
m.dataoffset = m.initoffset; /* start at the beginning of the transfer */
m.datalen = m.initlen;
if (m.sdcount > 1) {
plex->multiblock++; /* more than one block for the request */
/*
* If we have two transfers that don't overlap,
* (one at the end of the first block, the other
* at the beginning of the second block),
* it's cheaper to split them.
*/
if (rsectors < plex->stripesize) {
m.sdcount = 1; /* just one subdisk */
m.stripesectors = m.initlen; /* and just this many sectors */
rsectors = m.initlen; /* and in the loop counter */
}
}
if (SD[plex->sdnos[m.psdno]].state < sd_reborn) /* is our parity subdisk down? */
m.badsdno = m.psdno; /* note that it's down */
if (bp->b_iocmd == BIO_READ) { /* read operation */
for (mysdno = m.firstsdno; rsectors > 0; mysdno++) {
if (mysdno == m.psdno) /* ignore parity on read */
mysdno++;
if (mysdno == plex->subdisks) /* wraparound */
mysdno = 0;
if (mysdno == m.psdno) /* parity, */
mysdno++; /* we've given already */
if (SD[plex->sdnos[mysdno]].state < sd_reborn) { /* got a bad subdisk, */
if (m.badsdno >= 0) /* we had one already, */
return REQUEST_DOWN; /* we can't take a second */
m.badsdno = mysdno; /* got the first */
m.groupoffset = m.dataoffset; /* define the bounds */
m.grouplen = m.datalen;
m.flags |= XFR_RECOVERY_READ; /* we need recovery */
plex->recovered_reads++; /* count another one */
} else
m.flags |= XFR_NORMAL_READ; /* normal read */
/* Update the pointers for the next block */
m.dataoffset = 0; /* back to the start of the stripe */
rsectors -= m.datalen; /* remaining sectors to examine */
m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */
}
} else { /* write operation */
for (mysdno = m.firstsdno; rsectors > 0; mysdno++) {
if (mysdno == m.psdno) /* parity stripe, we've dealt with that */
mysdno++;
if (mysdno == plex->subdisks) /* wraparound */
mysdno = 0;
if (mysdno == m.psdno) /* parity, */
mysdno++; /* we've given already */
sd = &SD[plex->sdnos[mysdno]];
if (sd->state != sd_up) {
enum requeststatus s;
s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
if (s && (m.badsdno >= 0)) { /* second bad disk, */
int sdno;
/*
* If the parity disk is down, there's
* no recovery. We make all involved
* subdisks stale. Otherwise, we
* should be able to recover, but it's
* like pulling teeth. Fix it later.
*/
for (sdno = 0; sdno < m.sdcount; sdno++) {
struct sd *sd = &SD[plex->sdnos[sdno]];
if (sd->state >= sd_reborn) /* sort of up, */
set_sd_state(sd->sdno, sd_stale, setstate_force); /* make it stale */
}
return s; /* and crap out */
}
m.badsdno = mysdno; /* note which one is bad */
m.flags |= XFR_DEGRADED_WRITE; /* we need recovery */
plex->degraded_writes++; /* count another one */
m.groupoffset = m.dataoffset; /* define the bounds */
m.grouplen = m.datalen;
} else {
m.flags |= XFR_NORMAL_WRITE; /* normal write operation */
if (m.writeoffset > m.dataoffset) { /* move write operation lower */
m.writelen = max(m.writeoffset + m.writelen,
m.dataoffset + m.datalen)
- m.dataoffset;
m.writeoffset = m.dataoffset;
} else
m.writelen = max(m.writeoffset + m.writelen,
m.dataoffset + m.datalen)
- m.writeoffset;
}
/* Update the pointers for the next block */
m.dataoffset = 0; /* back to the start of the stripe */
rsectors -= m.datalen; /* remaining sectors to examine */
m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */
}
if (m.badsdno == m.psdno) { /* got a bad parity block, */
struct sd *psd = &SD[plex->sdnos[m.psdno]];
if (psd->state == sd_down)
set_sd_state(psd->sdno, sd_obsolete, setstate_force); /* it's obsolete now */
else if (psd->state == sd_crashed)
set_sd_state(psd->sdno, sd_stale, setstate_force); /* it's stale now */
m.flags &= ~XFR_NORMAL_WRITE; /* this write isn't normal, */
m.flags |= XFR_PARITYLESS_WRITE; /* it's parityless */
plex->parityless_writes++; /* count another one */
}
}
/* reset the initial transfer values */
m.dataoffset = m.initoffset; /* start at the beginning of the transfer */
m.datalen = m.initlen;
/* decide how many requests we need */
if (m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE))
/* doing a recovery read or degraded write, */
m.rqcount = plex->subdisks; /* all subdisks */
else if (m.flags & XFR_NORMAL_WRITE) /* normal write, */
m.rqcount = m.sdcount + 1; /* all data blocks and the parity block */
else /* parityless write or normal read */
m.rqcount = m.sdcount; /* just the data blocks */
/* Part C: build the requests */
rqg = allocrqg(rq, m.rqcount); /* get a request group */
if (rqg == NULL) { /* malloc failed */
bp->b_error = ENOMEM;
bp->b_ioflags |= BIO_ERROR;
return REQUEST_ENOMEM;
}
rqg->plexno = plexno;
rqg->flags = m.flags;
rqno = 0; /* index in the request group */
/* 1: PARITY BLOCK */
/*
* Are we performing an operation which requires parity? In that case,
* work out the parameters and define the parity block.
* XFR_PARITYOP is XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE
*/
if (m.flags & XFR_PARITYOP) { /* need parity */
rqe = &rqg->rqe[rqno]; /* point to element */
sd = &SD[plex->sdnos[m.psdno]]; /* the subdisk in question */
rqe->rqg = rqg; /* point back to group */
rqe->flags = (m.flags | XFR_PARITY_BLOCK | XFR_MALLOCED) /* always malloc parity block */
&~(XFR_NORMAL_READ | XFR_PARITYLESS_WRITE); /* transfer flags without data op stuf */
setrqebounds(rqe, &m); /* set up the bounds of the transfer */
rqe->sdno = sd->sdno; /* subdisk number */
rqe->driveno = sd->driveno;
if (build_rq_buffer(rqe, plex)) /* build the buffer */
return REQUEST_ENOMEM; /* can't do it */
rqe->b.b_iocmd = BIO_READ; /* we must read first */
m.sdcount++; /* adjust the subdisk count */
rqno++; /* and point to the next request */
}
/*
* 2: DATA BLOCKS
* Now build up requests for the blocks required
* for individual transfers
*/
for (mysdno = m.firstsdno; rqno < m.sdcount; mysdno++, rqno++) {
if (mysdno == m.psdno) /* parity, */
mysdno++; /* we've given already */
if (mysdno == plex->subdisks) /* got to the end, */
mysdno = 0; /* wrap around */
if (mysdno == m.psdno) /* parity, */
mysdno++; /* we've given already */
rqe = &rqg->rqe[rqno]; /* point to element */
sd = &SD[plex->sdnos[mysdno]]; /* the subdisk in question */
rqe->rqg = rqg; /* point to group */
if (m.flags & XFR_NEEDS_MALLOC) /* we need a malloced buffer first */
rqe->flags = m.flags | XFR_DATA_BLOCK | XFR_MALLOCED; /* transfer flags */
else
rqe->flags = m.flags | XFR_DATA_BLOCK; /* transfer flags */
if (mysdno == m.badsdno) { /* this is the bad subdisk */
rqg->badsdno = rqno; /* note which one */
rqe->flags |= XFR_BAD_SUBDISK; /* note that it's dead */
/*
* we can't read or write from/to it,
* but we don't need to malloc
*/
rqe->flags &= ~(XFR_MALLOCED | XFR_NORMAL_READ | XFR_NORMAL_WRITE);
}
setrqebounds(rqe, &m); /* set up the bounds of the transfer */
rqe->useroffset = m.useroffset; /* offset in user buffer */
rqe->sdno = sd->sdno; /* subdisk number */
rqe->driveno = sd->driveno;
if (build_rq_buffer(rqe, plex)) /* build the buffer */
return REQUEST_ENOMEM; /* can't do it */
if ((m.flags & XFR_PARITYOP) /* parity operation, */
&&((m.flags & XFR_BAD_SUBDISK) == 0)) /* and not the bad subdisk, */
rqe->b.b_iocmd = BIO_READ; /* we must read first */
/* Now update pointers for the next block */
*diskaddr += m.datalen; /* skip past what we've done */
m.stripesectors -= m.datalen; /* deduct from what's left */
m.useroffset += m.datalen; /* and move on in the user buffer */
m.datalen = min(m.stripesectors, plex->stripesize); /* and recalculate */
m.dataoffset = 0; /* start at the beginning of next block */
}
/*
* 3: REMAINING BLOCKS FOR RECOVERY
* Finally, if we have a recovery operation, build
* up transfers for the other subdisks. Follow the
* subdisks around until we get to where we started.
* These requests use only the group parameters.
*/
if ((rqno < m.rqcount) /* haven't done them all already */
&&(m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE))) {
for (; rqno < m.rqcount; rqno++, mysdno++) {
if (mysdno == m.psdno) /* parity, */
mysdno++; /* we've given already */
if (mysdno == plex->subdisks) /* got to the end, */
mysdno = 0; /* wrap around */
if (mysdno == m.psdno) /* parity, */
mysdno++; /* we've given already */
rqe = &rqg->rqe[rqno]; /* point to element */
sd = &SD[plex->sdnos[mysdno]]; /* the subdisk in question */
rqe->rqg = rqg; /* point to group */
rqe->sdoffset = m.sdbase + m.groupoffset; /* start of transfer */
rqe->dataoffset = 0; /* for tidiness' sake */
rqe->groupoffset = 0; /* group starts at the beginining */
rqe->datalen = 0;
rqe->grouplen = m.grouplen;
rqe->buflen = m.grouplen;
rqe->flags = (m.flags | XFR_MALLOCED) /* transfer flags without data op stuf */
&~XFR_DATAOP;
rqe->sdno = sd->sdno; /* subdisk number */
rqe->driveno = sd->driveno;
if (build_rq_buffer(rqe, plex)) /* build the buffer */
return REQUEST_ENOMEM; /* can't do it */
rqe->b.b_iocmd = BIO_READ; /* we must read first */
}
}
/*
* We need to lock the address range before
* doing anything. We don't have to be
* performing a recovery operation: somebody
* else could be doing so, and the results could
* influence us. Note the fact here, we'll perform
* the lock in launch_requests.
*/
rqg->lockbase = m.stripebase;
if (*diskaddr < diskend) /* didn't finish the request on this stripe */
plex->multistripe++; /* count another one */
}
return REQUEST_OK;
}
/*
* Helper function for rqe5: adjust the bounds of
* the transfers to minimize the buffer
* allocation.
*
* Each request can handle two of three different
* data ranges:
*
* 1. The range described by the parameters
* dataoffset and datalen, for normal read or
* parityless write.
* 2. The range described by the parameters
* groupoffset and grouplen, for recovery read
* and degraded write.
* 3. For normal write, the range depends on the
* kind of block. For data blocks, the range
* is defined by dataoffset and datalen. For
* parity blocks, it is defined by writeoffset
* and writelen.
*
* In order not to allocate more memory than
* necessary, this function adjusts the bounds
* parameter for each request to cover just the
* minimum necessary for the function it performs.
* This will normally vary from one request to the
* next.
*
* Things are slightly different for the parity
* block. In this case, the bounds defined by
* mp->writeoffset and mp->writelen also play a
* rôle. Select this case by setting the
* parameter forparity != 0.
*/
void
setrqebounds(struct rqelement *rqe, struct metrics *mp)
{
/* parity block of a normal write */
if ((rqe->flags & (XFR_NORMAL_WRITE | XFR_PARITY_BLOCK))
== (XFR_NORMAL_WRITE | XFR_PARITY_BLOCK)) { /* case 3 */
if (rqe->flags & XFR_DEGRADED_WRITE) { /* also degraded write */
/*
* With a combined normal and degraded write, we
* will zero out the area of the degraded write
* in the second phase, so we don't need to read
* it in. Unfortunately, we need a way to tell
* build_request_buffer the size of the buffer,
* and currently that's the length of the read.
* As a result, we read everything, even the stuff
* that we're going to nuke.
* FIXME XXX
*/
if (mp->groupoffset < mp->writeoffset) { /* group operation starts lower */
rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */
rqe->dataoffset = mp->writeoffset - mp->groupoffset; /* data starts here */
rqe->groupoffset = 0; /* and the group at the beginning */
} else { /* individual data starts first */
rqe->sdoffset = mp->sdbase + mp->writeoffset; /* start of transfer */
rqe->dataoffset = 0; /* individual data starts at the beginning */
rqe->groupoffset = mp->groupoffset - mp->writeoffset; /* group starts here */
}
rqe->datalen = mp->writelen;
rqe->grouplen = mp->grouplen;
} else { /* just normal write (case 3) */
rqe->sdoffset = mp->sdbase + mp->writeoffset; /* start of transfer */
rqe->dataoffset = 0; /* degradation starts at the beginning */
rqe->groupoffset = 0; /* for tidiness' sake */
rqe->datalen = mp->writelen;
rqe->grouplen = 0;
}
} else if (rqe->flags & XFR_DATAOP) { /* data operation (case 1 or 3) */
if (rqe->flags & XFR_GROUPOP) { /* also a group operation (case 2) */
if (mp->groupoffset < mp->dataoffset) { /* group operation starts lower */
rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */
rqe->dataoffset = mp->dataoffset - mp->groupoffset; /* data starts here */
rqe->groupoffset = 0; /* and the group at the beginning */
} else { /* individual data starts first */
rqe->sdoffset = mp->sdbase + mp->dataoffset; /* start of transfer */
rqe->dataoffset = 0; /* individual data starts at the beginning */
rqe->groupoffset = mp->groupoffset - mp->dataoffset; /* group starts here */
}
rqe->datalen = mp->datalen;
rqe->grouplen = mp->grouplen;
} else { /* just data operation (case 1) */
rqe->sdoffset = mp->sdbase + mp->dataoffset; /* start of transfer */
rqe->dataoffset = 0; /* degradation starts at the beginning */
rqe->groupoffset = 0; /* for tidiness' sake */
rqe->datalen = mp->datalen;
rqe->grouplen = 0;
}
} else { /* just group operations (case 2) */
rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */
rqe->dataoffset = 0; /* for tidiness' sake */
rqe->groupoffset = 0; /* group starts at the beginining */
rqe->datalen = 0;
rqe->grouplen = mp->grouplen;
}
rqe->buflen = max(rqe->dataoffset + rqe->datalen, /* total buffer length */
rqe->groupoffset + rqe->grouplen);
}
/* Local Variables: */
/* fill-column: 50 */
/* End: */

File diff suppressed because it is too large Load Diff

View File

@ -1,620 +0,0 @@
/*-
* Copyright (c) 1997, 1998, 1999
* Nan Yang Computer Services Limited. All rights reserved.
*
* Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
*
* Written by Greg Lehey
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumrevive.c,v 1.19 2003/05/08 04:34:47 grog Exp grog $
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <dev/vinum/vinumhdr.h>
#include <dev/vinum/request.h>
/*
* Revive a block of a subdisk. Return an error
* indication. EAGAIN means successful copy, but
* that more blocks remain to be copied. EINVAL
* means that the subdisk isn't associated with a
* plex (which means a programming error if we get
* here at all; FIXME).
*/
int
revive_block(int sdno)
{
int s; /* priority level */
struct sd *sd;
struct plex *plex;
struct volume *vol;
struct buf *bp;
int error = EAGAIN;
int size; /* size of revive block, bytes */
daddr_t plexblkno; /* lblkno in plex */
int psd; /* parity subdisk number */
u_int64_t stripe; /* stripe number */
int paritysd = 0; /* set if this is the parity stripe */
struct rangelock *lock; /* for locking */
daddr_t stripeoffset; /* offset in stripe */
plexblkno = 0; /* to keep the compiler happy */
sd = &SD[sdno];
lock = NULL;
if (sd->plexno < 0) /* no plex? */
return EINVAL;
plex = &PLEX[sd->plexno]; /* point to plex */
if (plex->volno >= 0)
vol = &VOL[plex->volno];
else
vol = NULL;
if ((sd->revive_blocksize == 0) /* no block size */
||(sd->revive_blocksize & ((1 << DEV_BSHIFT) - 1))) /* or invalid block size */
sd->revive_blocksize = DEFAULT_REVIVE_BLOCKSIZE;
else if (sd->revive_blocksize > MAX_REVIVE_BLOCKSIZE)
sd->revive_blocksize = MAX_REVIVE_BLOCKSIZE;
size = min(sd->revive_blocksize >> DEV_BSHIFT, sd->sectors - sd->revived) << DEV_BSHIFT;
sd->reviver = curproc->p_pid; /* note who last had a bash at it */
/* Now decide where to read from */
switch (plex->organization) {
case plex_concat:
plexblkno = sd->revived + sd->plexoffset; /* corresponding address in plex */
break;
case plex_striped:
stripeoffset = sd->revived % plex->stripesize; /* offset from beginning of stripe */
if (stripeoffset + (size >> DEV_BSHIFT) > plex->stripesize)
size = (plex->stripesize - stripeoffset) << DEV_BSHIFT;
plexblkno = sd->plexoffset /* base */
+ (sd->revived - stripeoffset) * plex->subdisks /* offset to beginning of stripe */
+ stripeoffset; /* offset from beginning of stripe */
break;
case plex_raid4:
case plex_raid5:
stripeoffset = sd->revived % plex->stripesize; /* offset from beginning of stripe */
plexblkno = sd->plexoffset /* base */
+ (sd->revived - stripeoffset) * (plex->subdisks - 1) /* offset to beginning of stripe */
+stripeoffset; /* offset from beginning of stripe */
stripe = (sd->revived / plex->stripesize); /* stripe number */
/* Make sure we don't go beyond the end of the band. */
size = min(size, (plex->stripesize - stripeoffset) << DEV_BSHIFT);
if (plex->organization == plex_raid4)
psd = plex->subdisks - 1; /* parity subdisk for this stripe */
else
psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */
paritysd = plex->sdnos[psd] == sdno; /* note if it's the parity subdisk */
/*
* Now adjust for the strangenesses
* in RAID-4 and RAID-5 striping.
*/
if (sd->plexsdno > psd) /* beyond the parity stripe, */
plexblkno -= plex->stripesize; /* one stripe less */
else if (paritysd)
plexblkno -= plex->stripesize * sd->plexsdno; /* go back to the beginning of the band */
break;
case plex_disorg: /* to keep the compiler happy */
break; /* to keep the pedants happy */
}
if (paritysd) { /* we're reviving a parity block, */
bp = parityrebuild(plex, sd->revived, size, rebuildparity, &lock, NULL); /* do the grunt work */
if (bp == NULL) /* no buffer space */
return ENOMEM; /* chicken out */
} else { /* data block */
s = splbio();
bp = geteblk(size); /* Get a buffer */
splx(s);
if (bp == NULL)
return ENOMEM;
/*
* Amount to transfer: block size, unless it
* would overlap the end.
*/
bp->b_bcount = size;
bp->b_resid = bp->b_bcount;
bp->b_blkno = plexblkno; /* start here */
if (isstriped(plex)) /* we need to lock striped plexes */
lock = lockrange(plexblkno << DEV_BSHIFT, bp, plex); /* lock it */
if (vol != NULL) /* it's part of a volume, */
/*
* First, read the data from the volume. We
* don't care which plex, that's bre's job.
*/
bp->b_dev = VOL[plex->volno].dev; /* create the device number */
else /* it's an unattached plex */
bp->b_dev = PLEX[sd->plexno].dev; /* create the device number */
bp->b_iocmd = BIO_READ; /* either way, read it */
bp->b_flags = 0;
vinumstart(bp, 1);
bufwait(bp);
}
if (bp->b_ioflags & BIO_ERROR) {
error = bp->b_error;
if (lock) /* we took a lock, */
unlockrange(sd->plexno, lock); /* give it back */
} else
/* Now write to the subdisk */
{
bp->b_dev = SD[sdno].dev; /* create the device number */
bp->b_flags &= ~B_DONE; /* no longer done */
bp->b_ioflags = 0;
bp->b_iocmd = BIO_WRITE;
bp->b_resid = bp->b_bcount;
bp->b_blkno = sd->revived; /* write it to here */
sdio(bp); /* perform the I/O */
bufwait(bp);
if (bp->b_ioflags & BIO_ERROR)
error = bp->b_error;
else {
sd->revived += bp->b_bcount >> DEV_BSHIFT; /* moved this much further down */
if (sd->revived >= sd->sectors) { /* finished */
sd->revived = 0;
set_sd_state(sdno, sd_up, setstate_force); /* bring the sd up */
log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state));
save_config(); /* and save the updated configuration */
error = 0; /* we're done */
}
}
if (lock) /* we took a lock, */
unlockrange(sd->plexno, lock); /* give it back */
while (sd->waitlist) { /* we have waiting requests */
#ifdef VINUMDEBUG
struct request *rq = sd->waitlist;
if (debug & DEBUG_REVIVECONFLICT)
log(LOG_DEBUG,
"Relaunch revive conflict sd %d: %p\n%s dev %d.%d, offset 0x%jx, length %ld\n",
rq->sdno,
rq,
rq->bp->b_iocmd == BIO_READ ? "Read" : "Write",
major(rq->bp->b_dev),
minor(rq->bp->b_dev),
(intmax_t) rq->bp->b_blkno,
rq->bp->b_bcount);
#endif
launch_requests(sd->waitlist, 1); /* do them now */
sd->waitlist = sd->waitlist->next; /* and move on to the next */
}
}
if (bp->b_qindex == 0) { /* not on a queue, */
bp->b_flags |= B_INVAL;
bp->b_ioflags &= ~BIO_ERROR;
brelse(bp); /* is this kosher? */
}
return error;
}
/*
* Check or rebuild the parity blocks of a RAID-4
* or RAID-5 plex.
*
* The variables plex->checkblock and
* plex->rebuildblock represent the
* subdisk-relative address of the stripe we're
* looking at, not the plex-relative address. We
* store it in the plex and not as a local
* variable because this function could be
* stopped, and we don't want to repeat the part
* we've already done. This is also the reason
* why we don't initialize it here except at the
* end. It gets initialized with the plex on
* creation.
*
* Each call to this function processes at most
* one stripe. We can't loop in this function,
* because we're unstoppable, so we have to be
* called repeatedly from userland.
*/
void
parityops(struct vinum_ioctl_msg *data)
{
int plexno;
struct plex *plex;
int size; /* I/O transfer size, bytes */
int stripe; /* stripe number in plex */
int psd; /* parity subdisk number */
struct rangelock *lock; /* lock on stripe */
struct _ioctl_reply *reply;
off_t pstripe; /* pointer to our stripe counter */
struct buf *pbp;
off_t errorloc; /* offset of parity error */
enum parityop op; /* operation to perform */
plexno = data->index;
op = data->op;
pbp = NULL;
reply = (struct _ioctl_reply *) data;
reply->error = EAGAIN; /* expect to repeat this call */
plex = &PLEX[plexno];
if (!isparity(plex)) { /* not RAID-4 or RAID-5 */
reply->error = EINVAL;
return;
} else if (plex->state < plex_flaky) {
reply->error = EIO;
strcpy(reply->msg, "Plex is not completely accessible\n");
return;
}
pstripe = data->offset;
stripe = pstripe / plex->stripesize; /* stripe number */
psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */
size = min(DEFAULT_REVIVE_BLOCKSIZE, /* one block at a time */
plex->stripesize << DEV_BSHIFT);
pbp = parityrebuild(plex, pstripe, size, op, &lock, &errorloc); /* do the grunt work */
if (pbp == NULL) { /* no buffer space */
reply->error = ENOMEM;
return; /* chicken out */
}
/*
* Now we have a result in the data buffer of
* the parity buffer header, which we have kept.
* Decide what to do with it.
*/
reply->msg[0] = '\0'; /* until shown otherwise */
if ((pbp->b_ioflags & BIO_ERROR) == 0) { /* no error */
if ((op == rebuildparity)
|| (op == rebuildandcheckparity)) {
pbp->b_iocmd = BIO_WRITE;
pbp->b_resid = pbp->b_bcount;
sdio(pbp); /* write the parity block */
bufwait(pbp);
}
if (((op == checkparity)
|| (op == rebuildandcheckparity))
&& (errorloc != -1)) {
if (op == checkparity)
reply->error = EIO;
sprintf(reply->msg,
"Parity incorrect at offset 0x%jx\n",
(intmax_t) errorloc);
}
if (reply->error == EAGAIN) { /* still OK, */
plex->checkblock = pstripe + (pbp->b_bcount >> DEV_BSHIFT); /* moved this much further down */
if (plex->checkblock >= SD[plex->sdnos[0]].sectors) { /* finished */
plex->checkblock = 0;
reply->error = 0;
}
}
}
if (pbp->b_ioflags & BIO_ERROR)
reply->error = pbp->b_error;
pbp->b_flags |= B_INVAL;
pbp->b_ioflags &= ~BIO_ERROR;
brelse(pbp);
unlockrange(plexno, lock);
}
/*
* Rebuild a parity stripe. Return pointer to
* parity bp. On return,
*
* 1. The band is locked. The caller must unlock
* the band and release the buffer header.
*
* 2. All buffer headers except php have been
* released. The caller must release pbp.
*
* 3. For checkparity and rebuildandcheckparity,
* the parity is compared with the current
* parity block. If it's different, the
* offset of the error is returned to
* errorloc. The caller can set the value of
* the pointer to NULL if this is called for
* rebuilding parity.
*
* pstripe is the subdisk-relative base address of
* the data to be reconstructed, size is the size
* of the transfer in bytes.
*/
struct buf *
parityrebuild(struct plex *plex,
u_int64_t pstripe,
int size,
enum parityop op,
struct rangelock **lockp,
off_t * errorloc)
{
int error;
int s;
int sdno;
u_int64_t stripe; /* stripe number */
int *parity_buf; /* buffer address for current parity block */
int *newparity_buf; /* and for new parity block */
int mysize; /* I/O transfer size for this transfer */
int isize; /* mysize in ints */
int i;
int psd; /* parity subdisk number */
int newpsd; /* and "subdisk number" of new parity */
struct buf **bpp; /* pointers to our bps */
struct buf *pbp; /* buffer header for parity stripe */
int *sbuf;
int bufcount; /* number of buffers we need */
stripe = pstripe / plex->stripesize; /* stripe number */
psd = plex->subdisks - 1 - stripe % plex->subdisks; /* parity subdisk for this stripe */
parity_buf = NULL; /* to keep the compiler happy */
error = 0;
/*
* It's possible that the default transfer size
* we chose is not a factor of the stripe size.
* We *must* limit this operation to a single
* stripe, at least for RAID-5 rebuild, since
* the parity subdisk changes between stripes,
* so in this case we need to perform a short
* transfer. Set variable mysize to reflect
* this.
*/
mysize = min(size, (plex->stripesize * (stripe + 1) - pstripe) << DEV_BSHIFT);
isize = mysize / (sizeof(int)); /* number of ints in the buffer */
bufcount = plex->subdisks + 1; /* sd buffers plus result buffer */
newpsd = plex->subdisks;
bpp = (struct buf **) Malloc(bufcount * sizeof(struct buf *)); /* array of pointers to bps */
/* First, build requests for all subdisks */
for (sdno = 0; sdno < bufcount; sdno++) { /* for each subdisk */
if ((sdno != psd) || (op != rebuildparity)) {
/* Get a buffer header and initialize it. */
s = splbio();
bpp[sdno] = geteblk(mysize); /* Get a buffer */
if (bpp[sdno] == NULL) {
while (sdno-- > 0) { /* release the ones we got */
bpp[sdno]->b_flags |= B_INVAL;
brelse(bpp[sdno]); /* give back our resources */
}
splx(s);
printf("vinum: can't allocate buffer space for parity op.\n");
return NULL; /* no bpps */
}
splx(s);
if (sdno == psd)
parity_buf = (int *) bpp[sdno]->b_data;
if (sdno == newpsd) /* the new one? */
bpp[sdno]->b_dev = SD[plex->sdnos[psd]].dev; /* write back to the parity SD */
else
bpp[sdno]->b_dev = SD[plex->sdnos[sdno]].dev; /* device number */
bpp[sdno]->b_iocmd = BIO_READ; /* either way, read it */
bpp[sdno]->b_flags = 0;
bpp[sdno]->b_bcount = mysize;
bpp[sdno]->b_resid = bpp[sdno]->b_bcount;
bpp[sdno]->b_blkno = pstripe; /* transfer from here */
}
}
/* Initialize result buffer */
pbp = bpp[newpsd];
newparity_buf = (int *) bpp[newpsd]->b_data;
bzero(newparity_buf, mysize);
/*
* Now lock the stripe with the first non-parity
* bp as locking bp.
*/
*lockp = lockrange(pstripe * plex->stripesize * (plex->subdisks - 1),
bpp[psd ? 0 : 1],
plex);
/*
* Then issue requests for all subdisks in
* parallel. Don't transfer the parity stripe
* if we're rebuilding parity, unless we also
* want to check it.
*/
for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each real subdisk */
if ((sdno != psd) || (op != rebuildparity)) {
sdio(bpp[sdno]);
}
}
/*
* Next, wait for the requests to complete.
* We wait in the order in which they were
* issued, which isn't necessarily the order in
* which they complete, but we don't have a
* convenient way of doing the latter, and the
* delay is minimal.
*/
for (sdno = 0; sdno < plex->subdisks; sdno++) { /* for each subdisk */
if ((sdno != psd) || (op != rebuildparity)) {
bufwait(bpp[sdno]);
if (bpp[sdno]->b_ioflags & BIO_ERROR) /* can't read, */
error = bpp[sdno]->b_error;
else if (sdno != psd) { /* update parity */
sbuf = (int *) bpp[sdno]->b_data;
for (i = 0; i < isize; i++)
((int *) newparity_buf)[i] ^= sbuf[i]; /* xor in the buffer */
}
}
if (sdno != psd) { /* release all bps except parity */
bpp[sdno]->b_flags |= B_INVAL;
brelse(bpp[sdno]); /* give back our resources */
}
}
/*
* If we're checking, compare the calculated
* and the read parity block. If they're
* different, return the plex-relative offset;
* otherwise return -1.
*/
if ((op == checkparity)
|| (op == rebuildandcheckparity)) {
*errorloc = -1; /* no error yet */
for (i = 0; i < isize; i++) {
if (parity_buf[i] != newparity_buf[i]) {
*errorloc = (off_t) (pstripe << DEV_BSHIFT) * (plex->subdisks - 1)
+ i * sizeof(int);
break;
}
}
bpp[psd]->b_flags |= B_INVAL;
brelse(bpp[psd]); /* give back our resources */
}
/* release our resources */
Free(bpp);
if (error) {
pbp->b_ioflags |= BIO_ERROR;
pbp->b_error = error;
}
return pbp;
}
/*
* Initialize a subdisk by writing zeroes to the
* complete address space. If verify is set,
* check each transfer for correctness.
*
* Each call to this function writes (and maybe
* checks) a single block.
*/
int
initsd(int sdno, int verify)
{
int s; /* priority level */
struct sd *sd;
struct plex *plex;
struct volume *vol;
struct buf *bp;
int error;
int size; /* size of init block, bytes */
daddr_t plexblkno; /* lblkno in plex */
int verified; /* set when we're happy with what we wrote */
error = 0;
plexblkno = 0; /* to keep the compiler happy */
sd = &SD[sdno];
if (sd->plexno < 0) /* no plex? */
return EINVAL;
plex = &PLEX[sd->plexno]; /* point to plex */
if (plex->volno >= 0)
vol = &VOL[plex->volno];
else
vol = NULL;
if (sd->init_blocksize == 0) {
sd->init_blocksize = DEFAULT_REVIVE_BLOCKSIZE;
} else if (sd->init_blocksize > MAX_REVIVE_BLOCKSIZE)
sd->init_blocksize = MAX_REVIVE_BLOCKSIZE;
size = min(sd->init_blocksize >> DEV_BSHIFT, sd->sectors - sd->initialized) << DEV_BSHIFT;
verified = 0;
while (!verified) { /* until we're happy with it, */
s = splbio();
bp = geteblk(size); /* Get a buffer */
splx(s);
if (bp == NULL)
return ENOMEM;
bp->b_bcount = size;
bp->b_resid = bp->b_bcount;
bp->b_blkno = sd->initialized; /* write it to here */
bzero(bp->b_data, bp->b_bcount);
bp->b_dev = SD[sdno].dev; /* create the device number */
bp->b_iocmd = BIO_WRITE;
sdio(bp); /* perform the I/O */
bufwait(bp);
if (bp->b_ioflags & BIO_ERROR)
error = bp->b_error;
if (bp->b_qindex == 0) { /* not on a queue, */
bp->b_flags |= B_INVAL;
bp->b_ioflags &= ~BIO_ERROR;
brelse(bp); /* is this kosher? */
}
if ((error == 0) && verify) { /* check that it got there */
s = splbio();
bp = geteblk(size); /* get a buffer */
if (bp == NULL) {
splx(s);
error = ENOMEM;
} else {
bp->b_bcount = size;
bp->b_resid = bp->b_bcount;
bp->b_blkno = sd->initialized; /* read from here */
bp->b_dev = SD[sdno].dev; /* create the device number */
bp->b_iocmd = BIO_READ; /* read it back */
splx(s);
sdio(bp);
bufwait(bp);
/*
* XXX Bug fix code. This is hopefully no
* longer needed (21 February 2000).
*/
if (bp->b_ioflags & BIO_ERROR)
error = bp->b_error;
else if ((*bp->b_data != 0) /* first word spammed */
||(bcmp(bp->b_data, &bp->b_data[1], bp->b_bcount - 1))) { /* or one of the others */
printf("vinum: init error on %s, offset 0x%llx sectors\n",
sd->name,
(long long) sd->initialized);
verified = 0;
} else
verified = 1;
if (bp->b_qindex == 0) { /* not on a queue, */
bp->b_flags |= B_INVAL;
bp->b_ioflags &= ~BIO_ERROR;
brelse(bp); /* is this kosher? */
}
}
} else
verified = 1;
}
if (error == 0) { /* did it, */
sd->initialized += size >> DEV_BSHIFT; /* moved this much further down */
if (sd->initialized >= sd->sectors) { /* finished */
sd->initialized = 0;
set_sd_state(sdno, sd_initialized, setstate_force); /* bring the sd up */
log(LOG_INFO, "vinum: %s is %s\n", sd->name, sd_state(sd->state));
save_config(); /* and save the updated configuration */
} else /* more to go, */
error = EAGAIN; /* ya'll come back, see? */
}
return error;
}
/* Local Variables: */
/* fill-column: 50 */
/* End: */

File diff suppressed because it is too large Load Diff

View File

@ -1,257 +0,0 @@
/*-
* Copyright (c) 1997, 1998
* Nan Yang Computer Services Limited. All rights reserved.
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $FreeBSD$
*/
/*
* This file gets read by makestatetext to create text files
* with the names of the states, so don't change the file
* format
*/
enum volumestate {
volume_unallocated,
/* present but unused. Must be 0 */
volume_uninit,
/* mentioned elsewhere but not known to the configuration */
volume_down,
/* The volume is up and functional, but not all plexes may be available */
volume_up,
volume_laststate = volume_up /* last value, for table dimensions */
};
enum plexstate {
/* An empty entry, not a plex at all. */
plex_unallocated,
/* The plex has been referenced by a volume */
plex_referenced,
/*
* The plex has been allocated, but there configuration
* is not complete
*/
plex_init,
/*
* A plex which has gone completely down because of
* I/O errors.
*/
plex_faulty,
/*
* A plex which has been taken down by the
* administrator.
*/
plex_down,
/* A plex which is being initialized */
plex_initializing,
/*
* *** The remaining states represent plexes which are
* at least partially up. Keep these separate so that
* they can be checked more easily.
*/
/*
* A plex entry which is at least partially up. Not
* all subdisks are available, and an inconsistency
* has occurred. If no other plex is uncorrupted,
* the volume is no longer consistent.
*/
plex_corrupt,
plex_firstup = plex_corrupt, /* first "up" state */
/*
* A RAID-5 plex entry which is accessible, but one
* subdisk is down, requiring recovery for many
* I/O requests.
*/
plex_degraded,
/*
* A plex which is really up, but which has a reborn
* subdisk which we don't completely trust, and
* which we don't want to read if we can avoid it
*/
plex_flaky,
/*
* A plex entry which is completely up. All subdisks
* are up.
*/
plex_up,
plex_laststate = plex_up /* last value, for table dimensions */
};
/* subdisk states */
enum sdstate {
/* An empty entry, not a subdisk at all. */
sd_unallocated,
/*
* A subdisk entry which has not been created
* completely. Some fields may be empty.
*/
sd_uninit,
/* The subdisk has been referenced by a plex */
sd_referenced,
/*
* A subdisk entry which has been created completely.
* All fields are correct, but the disk hasn't
* been updated.
*/
sd_init,
/*
* A subdisk entry which has been created completely.
* All fields are correct, and the disk has been
* updated, but there is no data on the disk.
*/
sd_empty,
/*
* A subdisk entry which has been created completely and
* which is currently being initialized
*/
sd_initializing,
/*
* A subdisk entry which has been initialized,
* but which can't come up because it would
* cause inconsistencies.
*/
sd_initialized,
/* *** The following states represent invalid data */
/*
* A subdisk entry which has been created completely.
* All fields are correct, the config on disk has been
* updated, and the data was valid, but since then the
* drive has been taken down, and as a result updates
* have been missed.
*/
sd_obsolete,
/*
* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has been crashed and updates have been lost.
*/
sd_stale,
/* *** The following states represent valid, inaccessible data */
/*
* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has gone down. No attempt has been made to write
* to the subdisk since the crash, so the data is valid.
*/
sd_crashed,
/*
* A subdisk entry which was up, which contained
* valid data, and which was taken down by the
* administrator. The data is valid.
*/
sd_down,
/*
* *** This is invalid data (the subdisk previously had
* a numerically lower state), but it is currently in the
* process of being revived. We can write but not read.
*/
sd_reviving,
/*
* *** The following states represent accessible subdisks
* with valid data
*/
/*
* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data was valid, but since then the drive
* has gone down and up again. No updates were lost,
* but it is possible that the subdisk has been
* damaged. We won't read from this subdisk if we
* have a choice. If this is the only subdisk which
* covers this address space in the plex, we set its
* state to sd_up under these circumstances, so this
* status implies that there is another subdisk to
* fulfil the request.
*/
sd_reborn,
/*
* A subdisk entry which has been created completely.
* All fields are correct, the disk has been updated,
* and the data is valid.
*/
sd_up,
sd_laststate = sd_up /* last value, for table dimensions */
};
enum drivestate {
drive_unallocated,
/* present but unused. Must be 0 */
drive_referenced,
/* just mentioned in some other config entry */
drive_down,
/* not accessible */
drive_up,
/* up and running */
drive_laststate = drive_up /* last value, for table dimensions */
};
/* Local Variables: */
/* fill-column: 50 */
/* End: */

View File

@ -1,311 +0,0 @@
/*-
* Copyright (c) 1997, 1998, 1999
* Nan Yang Computer Services Limited. All rights reserved.
*
* Written by Greg Lehey
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumutil.c,v 1.17 2003/04/28 02:54:43 grog Exp $
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/* This file contains utility routines used both in kernel and user context */
#include <dev/vinum/vinumhdr.h>
#include <dev/vinum/statetexts.h>
#ifndef _KERNEL
#include <stdio.h>
#include <string.h>
extern jmp_buf command_fail; /* return on a failed command */
#endif
static char numeric_state[32]; /* temporary buffer for ASCII conversions */
#define STATECOUNT(x) (sizeof (x##statetext) / sizeof (char *))
/* Return drive state as a string */
char *
drive_state(enum drivestate state)
{
if (((unsigned) state) >= STATECOUNT(drive)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return drivestatetext[state];
}
/* Return volume state as a string */
char *
volume_state(enum volumestate state)
{
if (((unsigned) state) >= STATECOUNT(vol)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return volstatetext[state];
}
/* Return plex state as a string */
char *
plex_state(enum plexstate state)
{
if (((unsigned) state) >= STATECOUNT(plex)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return plexstatetext[state];
}
/* Return plex organization as a string */
char *
plex_org(enum plexorg org)
{
switch (org) {
case plex_disorg: /* disorganized */
return "disorg";
break;
case plex_concat: /* concatenated plex */
return "concat";
break;
case plex_striped: /* striped plex */
return "striped";
break;
case plex_raid4: /* RAID-4 plex */
return "raid4";
case plex_raid5: /* RAID-5 plex */
return "raid5";
break;
default:
sprintf(numeric_state, "Invalid org %d", (int) org);
return numeric_state;
}
}
/* Return sd state as a string */
char *
sd_state(enum sdstate state)
{
if (((unsigned) state) >= STATECOUNT(sd)) {
sprintf(numeric_state, "Invalid state %d", (int) state);
return numeric_state;
} else
return sdstatetext[state];
}
/* Now convert in the other direction */
/*
* These are currently used only internally,
* so we don't do too much error checking
*/
enum drivestate
DriveState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(drive); i++)
if (strcmp(text, drivestatetext[i]) == 0) /* found it */
return (enum drivestate) i;
return -1;
}
enum sdstate
SdState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(sd); i++)
if (strcmp(text, sdstatetext[i]) == 0) /* found it */
return (enum sdstate) i;
return -1;
}
enum plexstate
PlexState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(plex); i++)
if (strcmp(text, plexstatetext[i]) == 0) /* found it */
return (enum plexstate) i;
return -1;
}
enum volumestate
VolState(char *text)
{
int i;
for (i = 0; i < STATECOUNT(vol); i++)
if (strcmp(text, volstatetext[i]) == 0) /* found it */
return (enum volumestate) i;
return -1;
}
/*
* Take a number with an optional scale factor and convert
* it to a number of bytes.
*
* The scale factors are:
*
* s sectors (of 512 bytes)
* b blocks (of 512 bytes). This unit is deprecated,
* because it's confusing, but maintained to avoid
* confusing Veritas users.
* k kilobytes (1024 bytes)
* m megabytes (of 1024 * 1024 bytes)
* g gigabytes (of 1024 * 1024 * 1024 bytes)
*/
u_int64_t
sizespec(char *spec)
{
u_int64_t size;
char *s;
int sign = 1; /* -1 if negative */
size = 0;
if (spec != NULL) { /* we have a parameter */
s = spec;
if (*s == '-') { /* negative, */
sign = -1;
s++; /* skip */
}
if ((*s >= '0') && (*s <= '9')) { /* it's numeric */
while ((*s >= '0') && (*s <= '9')) /* it's numeric */
size = size * 10 + *s++ - '0'; /* convert it */
switch (*s) {
case '\0':
return size * sign;
case 'B':
case 'b':
case 'S':
case 's':
return size * sign * 512;
case 'K':
case 'k':
return size * sign * 1024;
case 'M':
case 'm':
return size * sign * 1024 * 1024;
case 'G':
case 'g':
return size * sign * 1024 * 1024 * 1024;
}
}
#ifdef _KERNEL
throw_rude_remark(EINVAL, "Invalid length specification: %s", spec);
#else
fprintf(stderr, "Invalid length specification: %s", spec);
longjmp(command_fail, 1);
#endif
}
#ifdef _KERNEL
throw_rude_remark(EINVAL, "Missing length specification");
#else
fprintf(stderr, "Missing length specification");
longjmp(command_fail, 1);
#endif
/* NOTREACHED */
return -1;
}
#ifdef _KERNEL
#define FOOTYPE struct cdev *
#else
#define FOOTYPE dev_t
#endif
/*
* Extract the volume number from a device number. Check that it's
* the correct type, and that it isn't one of the superdevs.
*/
int
Volno(FOOTYPE dev)
{
int volno = minor(dev);
if (OBJTYPE(dev) != VINUM_VOLUME_TYPE)
return -1;
else
volno = ((volno & 0x3fff0000) >> 8) | (volno & 0xff);
if ((volno == VINUM_SUPERDEV_VOL)
|| (volno == VINUM_DAEMON_VOL))
return -1;
else
return volno;
}
/*
* Extract a plex number from a device number.
* Don't check the major number, but check the
* type. Return -1 for invalid types.
*/
int
Plexno(FOOTYPE dev)
{
int plexno = minor(dev);
if (OBJTYPE(dev) != VINUM_PLEX_TYPE)
return -1;
else
return ((plexno & 0x3fff0000) >> 8) | (plexno & 0xff);
}
/*
* Extract a subdisk number from a device number.
* Don't check the major number, but check the
* type. Return -1 for invalid types.
*/
int
Sdno(FOOTYPE dev)
{
int sdno = minor(dev);
/*
* Care: VINUM_SD_TYPE is 2 or 3, which is why we use < instead of
* !=. It's not clear that this makes any sense abstracting it to
* this level.
*/
if (OBJTYPE(dev) < VINUM_SD_TYPE)
return -1;
else
/*
* Note that the number we return includes the low-order bit of the
* type field. This gives us twice as many potential subdisks as
* plexes or volumes.
*/
return ((sdno & 0x7fff0000) >> 8) | (sdno & 0xff);
}

View File

@ -1,54 +0,0 @@
/*-
* Copyright (c) 1997, 1998, 1999
* Nan Yang Computer Services Limited. All rights reserved.
*
* Written by Greg Lehey
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumutil.h,v 1.1 2001/05/22 04:07:22 grog Exp grog $
* $FreeBSD$
*/
/*
* Functions defined in vinumutil.c, which is used both in userland
* and in the kernel.
*/
char *drive_state(enum drivestate);
char *volume_state(enum volumestate);
char *plex_state(enum plexstate);
char *plex_org(enum plexorg);
char *sd_state(enum sdstate);
enum drivestate DriveState(char *text);
enum sdstate SdState(char *text);
enum plexstate PlexState(char *text);
enum volumestate VolState(char *text);

View File

@ -1,395 +0,0 @@
/*-
* Copyright (c) 1997, 1998, 1999
* Nan Yang Computer Services Limited. All rights reserved.
*
* Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
*
* Written by Greg Lehey
*
* This software is distributed under the so-called ``Berkeley
* License'':
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Nan Yang Computer
* Services Limited.
* 4. Neither the name of the Company nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* This software is provided ``as is'', and any express or implied
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose are disclaimed.
* In no event shall the company or contributors be liable for any
* direct, indirect, incidental, special, exemplary, or consequential
* damages (including, but not limited to, procurement of substitute
* goods or services; loss of use, data, or profits; or business
* interruption) however caused and on any theory of liability, whether
* in contract, strict liability, or tort (including negligence or
* otherwise) arising in any way out of the use of this software, even if
* advised of the possibility of such damage.
*
* $Id: vinumvar.h,v 1.33 2003/05/23 01:09:23 grog Exp grog $
* $FreeBSD$
*/
#include <sys/time.h>
#include <dev/vinum/vinumstate.h>
#include <sys/mutex.h>
/* Directory for device nodes. */
#define VINUM_DIR "/dev/vinum"
/*
* Some configuration maxima. They're an enum because
* we can't define global constants. Sorry about that.
*
* These aren't as bad as they look: most of them are soft limits.
*/
#define VINUMROOT
enum constants {
/*
* Current version of the data structures. This
* is used to ensure synchronization between
* kernel module and userland vinum(8).
*/
VINUMVERSION = 1,
VINUM_HEADER = 512, /* size of header on disk */
MAXCONFIGLINE = 1024, /* maximum size of a single config line */
MINVINUMSLICE = 1048576, /* minimum size of a slice */
ROUND_ROBIN_READPOL = -1, /* round robin read policy */
/*
* Type field in high-order two bits of minor
* number. Subdisks are in fact both type 2 and
* type 3, giving twice the number of subdisks.
* This causes some ugliness in the code.
*/
VINUM_VOLUME_TYPE = 0,
VINUM_PLEX_TYPE = 1,
VINUM_SD_TYPE = 2,
VINUM_SD2_TYPE = 3,
/*
* Define a minor device number.
* This is not used directly; instead, it's
* called by the other macros.
*/
#define VINUMMINOR(o,t) ((o & 0xff) | ((o & 0x3fff00) << 8) | (t << VINUM_TYPE_SHIFT))
VINUM_TYPE_SHIFT = 30,
VINUM_MAXVOL = 0x3ffffd, /* highest numbered volume */
/*
* The super device and the daemon device are
* magic: they're the two highest-numbered
* volumes.
*/
VINUM_SUPERDEV_VOL = 0x3ffffe,
VINUM_DAEMON_VOL = 0x3fffff,
VINUM_MAXPLEX = 0x3fffff,
VINUM_MAXSD = 0x7fffff,
#define VINUM_SUPERDEV_MINOR VINUMMINOR (VINUM_SUPERDEV_VOL, VINUM_VOLUME_TYPE)
#define VINUM_DAEMON_MINOR VINUMMINOR (VINUM_DAEMON_VOL, VINUM_VOLUME_TYPE)
/*
* Mask for the number part of each object.
* Plexes and volumes are the same, subdisks use
* the low-order bit of the type field and thus
* have twice the number.
*/
MAJORDEV_SHIFT = 8,
MAXPLEX = 8, /* maximum number of plexes in a volume */
MAXSD = 256, /* maximum number of subdisks in a plex */
MAXDRIVENAME = 32, /* maximum length of a device name */
MAXSDNAME = 64, /* maximum length of a subdisk name */
MAXPLEXNAME = 64, /* maximum length of a plex name */
MAXVOLNAME = 64, /* maximum length of a volume name */
MAXNAME = 64, /* maximum length of any name */
#define OBJTYPE(x) ((minor(x) >> VINUM_TYPE_SHIFT) & 3)
/* extract device type */
#define DEVTYPE(x) ((minor (x) >> VINUM_TYPE_SHIFT) & 3)
#define VINUM_SUPERDEV_NAME VINUM_DIR"/control" /* normal super device */
#define VINUM_DAEMON_DEV_NAME VINUM_DIR"/controld" /* super device for daemon only */
/*
* the number of object entries to cater for initially, and also the
* value by which they are incremented. It doesn't take long
* to extend them, so theoretically we could start with 1 of each, but
* it's untidy to allocate such small areas. These values are
* probably too small.
*/
INITIAL_DRIVES = 4,
INITIAL_VOLUMES = 4,
INITIAL_PLEXES = 8,
INITIAL_SUBDISKS = 16,
INITIAL_SUBDISKS_IN_PLEX = 4, /* number of subdisks to allocate to a plex */
INITIAL_SUBDISKS_IN_DRIVE = 4, /* number of subdisks to allocate to a drive */
INITIAL_DRIVE_FREELIST = 16, /* number of entries in drive freelist */
PLEX_REGION_TABLE_SIZE = 8, /* number of entries in plex region tables */
PLEX_LOCKS = 256, /* number of locks to allocate to a plex */
PLEXMUTEXES = 32,
MAX_REVIVE_BLOCKSIZE = MAXPHYS, /* maximum revive block size */
DEFAULT_REVIVE_BLOCKSIZE = 65536, /* default revive block size */
VINUMHOSTNAMELEN = 32, /* host name field in label */
};
/*
* Slice header
*
* Vinum drives start with this structure:
*
*\ Sector
* |--------------------------------------|
* | PDP-11 memorial boot block | 0
* |--------------------------------------|
* | Disk label, maybe | 1
* |--------------------------------------|
* | Slice definition (vinum_hdr) | 8
* |--------------------------------------|
* | |
* | Configuration info, first copy | 9
* | |
* |--------------------------------------|
* | |
* | Configuration info, second copy | 9 + size of config
* | |
* |--------------------------------------|
*/
/* Sizes and offsets of our information */
enum {
VINUM_LABEL_OFFSET = 4096, /* offset of vinum label */
VINUMHEADERLEN = 512, /* size of vinum label */
VINUM_CONFIG_OFFSET = 4608, /* offset of first config copy */
MAXCONFIG = 65536, /* and size of config copy */
DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE /* this is where the data starts */
};
/*
* hostname is 256 bytes long, but we don't need to shlep
* multiple copies in vinum. We use the host name just
* to identify this system, and 32 bytes should be ample
* for that purpose
*/
struct vinum_label {
char sysname[VINUMHOSTNAMELEN]; /* system name at time of creation */
char name[MAXDRIVENAME]; /* our name of the drive */
struct timeval date_of_birth; /* the time it was created */
struct timeval last_update; /* and the time of last update */
/*
* total size in bytes of the drive. This value
* includes the headers.
*/
off_t drive_size;
};
struct vinum_hdr {
uint64_t magic; /* we're long on magic numbers */
#define VINUM_MAGIC 22322600044678729LL /* should be this */
#define VINUM_NOMAGIC 22322600044678990LL /* becomes this after obliteration */
/*
* Size in bytes of each copy of the
* configuration info. This must be a multiple
* of the sector size.
*/
int config_length;
struct vinum_label label; /* unique label */
};
/* Information returned from read_drive_label */
enum drive_label_info {
DL_CANT_OPEN, /* invalid partition */
DL_NOT_OURS, /* valid partition, but no vinum label */
DL_DELETED_LABEL, /* valid partition, deleted label found */
DL_WRONG_DRIVE, /* drive name doesn't match */
DL_OURS /* valid partition and label found */
};
/* kinds of plex organization */
enum plexorg {
plex_disorg, /* disorganized */
plex_concat, /* concatenated plex */
plex_striped, /* striped plex */
plex_raid4, /* RAID4 plex */
plex_raid5 /* RAID5 plex */
};
/* Recognize plex organizations */
#define isstriped(p) (p->organization >= plex_striped) /* RAID 1, 4 or 5 */
#define isparity(p) (p->organization >= plex_raid4) /* RAID 4 or 5 */
/* Address range definitions, for locking volumes */
struct rangelock {
daddr_t stripe; /* address + 1 of the range being locked */
struct buf *bp; /* user's buffer pointer */
};
struct drive_freelist { /* sorted list of free space on drive */
u_int64_t offset; /* offset of entry */
u_int64_t sectors; /* and length in sectors */
};
/*
* Include the structure definitions shared
* between userland and kernel.
*/
#ifdef _KERNEL
#include <dev/vinum/vinumobj.h>
#undef _KERNEL
#include <dev/vinum/vinumobj.h>
#define _KERNEL
#else
#include <dev/vinum/vinumobj.h>
#endif
/*
* Table expansion. Expand table, which contains oldcount
* entries of type element, by increment entries, and change
* oldcount accordingly
*/
#ifdef VINUMDEBUG
#define EXPAND(table, element, oldcount, increment) \
{ \
expand_table ((void **) &table, \
oldcount * sizeof (element), \
(oldcount + increment) * sizeof (element), \
__FILE__, \
__LINE__ ); \
oldcount += increment; \
}
#else
#define EXPAND(table, element, oldcount, increment) \
{ \
expand_table ((void **) &table, \
oldcount * sizeof (element), \
(oldcount + increment) * sizeof (element)); \
oldcount += increment; \
}
#endif
/* Information on vinum's memory usage */
struct meminfo {
int mallocs; /* number of malloced blocks */
int total_malloced; /* total amount malloced */
int highwater; /* maximum number of mallocs */
struct mc *malloced; /* pointer to kernel table */
};
#define MCFILENAMELEN 16
struct mc {
struct timeval time;
int seq;
int size;
short line;
caddr_t address;
char file[MCFILENAMELEN];
};
/*
* These enums are used by the state transition
* routines. They're in bit map format:
*
* Bit 0: Other plexes in the volume are down
* Bit 1: Other plexes in the volume are up
* Bit 2: The current plex is up
* Maybe they should be local to
* state.c
*/
enum volplexstate {
volplex_onlyusdown = 0, /* 0: we're the only plex, and we're down */
volplex_alldown, /* 1: another plex is down, and so are we */
volplex_otherup, /* 2: another plex is up */
volplex_otherupdown, /* 3: other plexes are up and down */
volplex_onlyus, /* 4: we're up and alone */
volplex_onlyusup, /* 5: only we are up, others are down */
volplex_allup, /* 6: all plexes are up */
volplex_someup /* 7: some plexes are up, including us */
};
/* state map for plex */
enum sdstates {
sd_emptystate = 1,
sd_downstate = 2, /* SD is down */
sd_crashedstate = 4, /* SD is crashed */
sd_obsoletestate = 8, /* SD is obsolete */
sd_stalestate = 16, /* SD is stale */
sd_rebornstate = 32, /* SD is reborn */
sd_upstate = 64, /* SD is up */
sd_initstate = 128, /* SD is initializing */
sd_initializedstate = 256, /* SD is initialized */
sd_otherstate = 512, /* SD is in some other state */
};
/*
* This is really just a parameter to pass to
* set_<foo>_state, but since it needs to be known
* in the external definitions, we need to define
* it here
*/
enum setstateflags {
setstate_none = 0, /* no flags */
setstate_force = 1, /* force the state change */
setstate_configuring = 2, /* we're currently configuring, don't save */
};
/* Operations for parityops to perform. */
enum parityop {
checkparity,
rebuildparity,
rebuildandcheckparity, /* rebuildparity with the -v option */
};
/*
* When doing round-robin reads from a multi-plex volume, switch to the
* next plex if the difference of the last read sector and the next sector
* to be read is this many sectors.
*/
#define ROUNDROBIN_SWITCH 128 /* 64k */
#ifdef VINUMDEBUG
/* Debugging stuff */
enum debugflags {
DEBUG_ADDRESSES = 1, /* show buffer information during requests */
DEBUG_NUMOUTPUT = 2, /* show the value of vp->v_numoutput */
DEBUG_RESID = 4, /* go into debugger in complete_rqe */
DEBUG_LASTREQS = 8, /* keep a circular buffer of last requests */
DEBUG_REVIVECONFLICT = 16, /* print info about revive conflicts */
DEBUG_EOFINFO = 32, /* print info about EOF detection */
DEBUG_MEMFREE = 64, /* keep info about Frees */
DEBUG_BIGDRIVE = 128, /* pretend our drives are 100 times the size */
DEBUG_REMOTEGDB = 256, /* go into remote gdb */
DEBUG_WARNINGS = 512, /* log various relatively harmless warnings */
DEBUG_LOCKREQS = 1024, /* log locking requests */
};
#ifdef _KERNEL
#ifdef __i386__
#define longjmp LongJmp /* test our longjmps */
#endif
#endif
#endif
/* Local Variables: */
/* fill-column: 50 */
/* End: */